xref: /llvm-project/llvm/test/CodeGen/X86/known-signbits-vector.ll (revision 122874c955e06defb619b1afd4e26db482dbbf19)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X86
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X64,X64-AVX1
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,X64,X64-AVX2
5
6define <2 x double> @signbits_sext_v2i64_sitofp_v2f64(i32 %a0, i32 %a1) nounwind {
7; X86-LABEL: signbits_sext_v2i64_sitofp_v2f64:
8; X86:       # %bb.0:
9; X86-NEXT:    vcvtdq2pd {{[0-9]+}}(%esp), %xmm0
10; X86-NEXT:    retl
11;
12; X64-LABEL: signbits_sext_v2i64_sitofp_v2f64:
13; X64:       # %bb.0:
14; X64-NEXT:    vmovd %edi, %xmm0
15; X64-NEXT:    vpinsrd $1, %esi, %xmm0, %xmm0
16; X64-NEXT:    vcvtdq2pd %xmm0, %xmm0
17; X64-NEXT:    retq
18  %1 = sext i32 %a0 to i64
19  %2 = sext i32 %a1 to i64
20  %3 = insertelement <2 x i64> undef, i64 %1, i32 0
21  %4 = insertelement <2 x i64> %3, i64 %2, i32 1
22  %5 = sitofp <2 x i64> %4 to <2 x double>
23  ret <2 x double> %5
24}
25
26define <4 x float> @signbits_sext_v4i64_sitofp_v4f32(i8 signext %a0, i16 signext %a1, i32 %a2, i32 %a3) nounwind {
27; X86-LABEL: signbits_sext_v4i64_sitofp_v4f32:
28; X86:       # %bb.0:
29; X86-NEXT:    movswl {{[0-9]+}}(%esp), %eax
30; X86-NEXT:    movsbl {{[0-9]+}}(%esp), %ecx
31; X86-NEXT:    vmovd %ecx, %xmm0
32; X86-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
33; X86-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
34; X86-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
35; X86-NEXT:    vcvtdq2ps %xmm0, %xmm0
36; X86-NEXT:    retl
37;
38; X64-LABEL: signbits_sext_v4i64_sitofp_v4f32:
39; X64:       # %bb.0:
40; X64-NEXT:    vmovd %edi, %xmm0
41; X64-NEXT:    vpinsrd $1, %esi, %xmm0, %xmm0
42; X64-NEXT:    vpinsrd $2, %edx, %xmm0, %xmm0
43; X64-NEXT:    vpinsrd $3, %ecx, %xmm0, %xmm0
44; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
45; X64-NEXT:    retq
46  %1 = sext i8 %a0 to i64
47  %2 = sext i16 %a1 to i64
48  %3 = sext i32 %a2 to i64
49  %4 = sext i32 %a3 to i64
50  %5 = insertelement <4 x i64> undef, i64 %1, i32 0
51  %6 = insertelement <4 x i64> %5, i64 %2, i32 1
52  %7 = insertelement <4 x i64> %6, i64 %3, i32 2
53  %8 = insertelement <4 x i64> %7, i64 %4, i32 3
54  %9 = sitofp <4 x i64> %8 to <4 x float>
55  ret <4 x float> %9
56}
57
58define <4 x double> @signbits_ashr_sitofp_0(<4 x i64> %a0) nounwind {
59; X86-LABEL: signbits_ashr_sitofp_0:
60; X86:       # %bb.0:
61; X86-NEXT:    vextractf128 $1, %ymm0, %xmm1
62; X86-NEXT:    vpsrlq $36, %xmm1, %xmm2
63; X86-NEXT:    vpsrlq $35, %xmm1, %xmm1
64; X86-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
65; X86-NEXT:    vpmovsxdq {{.*#+}} xmm2 = [268435456,134217728]
66; X86-NEXT:    vpxor %xmm2, %xmm1, %xmm1
67; X86-NEXT:    vpsubq %xmm2, %xmm1, %xmm1
68; X86-NEXT:    vpsrlq $34, %xmm0, %xmm2
69; X86-NEXT:    vpsrlq $33, %xmm0, %xmm0
70; X86-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
71; X86-NEXT:    vpmovsxdq {{.*#+}} xmm2 = [1073741824,536870912]
72; X86-NEXT:    vpxor %xmm2, %xmm0, %xmm0
73; X86-NEXT:    vpsubq %xmm2, %xmm0, %xmm0
74; X86-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
75; X86-NEXT:    vcvtdq2pd %xmm0, %ymm0
76; X86-NEXT:    retl
77;
78; X64-AVX1-LABEL: signbits_ashr_sitofp_0:
79; X64-AVX1:       # %bb.0:
80; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
81; X64-AVX1-NEXT:    vpsrlq $36, %xmm1, %xmm2
82; X64-AVX1-NEXT:    vpsrlq $35, %xmm1, %xmm1
83; X64-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
84; X64-AVX1-NEXT:    vpmovsxdq {{.*#+}} xmm2 = [268435456,134217728]
85; X64-AVX1-NEXT:    vpxor %xmm2, %xmm1, %xmm1
86; X64-AVX1-NEXT:    vpsubq %xmm2, %xmm1, %xmm1
87; X64-AVX1-NEXT:    vpsrlq $34, %xmm0, %xmm2
88; X64-AVX1-NEXT:    vpsrlq $33, %xmm0, %xmm0
89; X64-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
90; X64-AVX1-NEXT:    vpmovsxdq {{.*#+}} xmm2 = [1073741824,536870912]
91; X64-AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm0
92; X64-AVX1-NEXT:    vpsubq %xmm2, %xmm0, %xmm0
93; X64-AVX1-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
94; X64-AVX1-NEXT:    vcvtdq2pd %xmm0, %ymm0
95; X64-AVX1-NEXT:    retq
96;
97; X64-AVX2-LABEL: signbits_ashr_sitofp_0:
98; X64-AVX2:       # %bb.0:
99; X64-AVX2-NEXT:    vpsrlvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
100; X64-AVX2-NEXT:    vpmovsxdq {{.*#+}} ymm1 = [1073741824,536870912,268435456,134217728]
101; X64-AVX2-NEXT:    vpxor %ymm1, %ymm0, %ymm0
102; X64-AVX2-NEXT:    vpsubq %ymm1, %ymm0, %ymm0
103; X64-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
104; X64-AVX2-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
105; X64-AVX2-NEXT:    vcvtdq2pd %xmm0, %ymm0
106; X64-AVX2-NEXT:    retq
107  %1 = ashr <4 x i64> %a0, <i64 33, i64 34, i64 35, i64 36>
108  %2 = sitofp <4 x i64> %1 to <4 x double>
109  ret <4 x double> %2
110}
111
112; PR45794
113define <4 x float> @signbits_ashr_sitofp_1(<4 x i64> %a0) nounwind {
114; X86-LABEL: signbits_ashr_sitofp_1:
115; X86:       # %bb.0:
116; X86-NEXT:    vextractf128 $1, %ymm0, %xmm1
117; X86-NEXT:    vpsrad $16, %xmm1, %xmm1
118; X86-NEXT:    vpsrad $16, %xmm0, %xmm0
119; X86-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
120; X86-NEXT:    vcvtdq2ps %xmm0, %xmm0
121; X86-NEXT:    vzeroupper
122; X86-NEXT:    retl
123;
124; X64-AVX1-LABEL: signbits_ashr_sitofp_1:
125; X64-AVX1:       # %bb.0:
126; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
127; X64-AVX1-NEXT:    vpsrad $16, %xmm1, %xmm1
128; X64-AVX1-NEXT:    vpsrad $16, %xmm0, %xmm0
129; X64-AVX1-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
130; X64-AVX1-NEXT:    vcvtdq2ps %xmm0, %xmm0
131; X64-AVX1-NEXT:    vzeroupper
132; X64-AVX1-NEXT:    retq
133;
134; X64-AVX2-LABEL: signbits_ashr_sitofp_1:
135; X64-AVX2:       # %bb.0:
136; X64-AVX2-NEXT:    vpsrad $16, %ymm0, %ymm0
137; X64-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
138; X64-AVX2-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
139; X64-AVX2-NEXT:    vcvtdq2ps %xmm0, %xmm0
140; X64-AVX2-NEXT:    vzeroupper
141; X64-AVX2-NEXT:    retq
142  %1 = ashr <4 x i64> %a0, <i64 48, i64 48, i64 48, i64 48>
143  %2 = sitofp <4 x i64> %1 to <4 x float>
144  ret <4 x float> %2
145}
146
147define float @signbits_ashr_extract_sitofp_0(<2 x i64> %a0) nounwind {
148; X86-LABEL: signbits_ashr_extract_sitofp_0:
149; X86:       # %bb.0:
150; X86-NEXT:    pushl %eax
151; X86-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
152; X86-NEXT:    vcvtdq2ps %xmm0, %xmm0
153; X86-NEXT:    vmovss %xmm0, (%esp)
154; X86-NEXT:    flds (%esp)
155; X86-NEXT:    popl %eax
156; X86-NEXT:    retl
157;
158; X64-LABEL: signbits_ashr_extract_sitofp_0:
159; X64:       # %bb.0:
160; X64-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
161; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
162; X64-NEXT:    retq
163  %1 = ashr <2 x i64> %a0, <i64 32, i64 32>
164  %2 = extractelement <2 x i64> %1, i32 0
165  %3 = sitofp i64 %2 to float
166  ret float %3
167}
168
169define float @signbits_ashr_extract_sitofp_1(<2 x i64> %a0) nounwind {
170; X86-LABEL: signbits_ashr_extract_sitofp_1:
171; X86:       # %bb.0:
172; X86-NEXT:    pushl %eax
173; X86-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
174; X86-NEXT:    vcvtdq2ps %xmm0, %xmm0
175; X86-NEXT:    vmovss %xmm0, (%esp)
176; X86-NEXT:    flds (%esp)
177; X86-NEXT:    popl %eax
178; X86-NEXT:    retl
179;
180; X64-LABEL: signbits_ashr_extract_sitofp_1:
181; X64:       # %bb.0:
182; X64-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
183; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
184; X64-NEXT:    retq
185  %1 = ashr <2 x i64> %a0, <i64 32, i64 63>
186  %2 = extractelement <2 x i64> %1, i32 0
187  %3 = sitofp i64 %2 to float
188  ret float %3
189}
190
191define float @signbits_ashr_shl_extract_sitofp(<2 x i64> %a0) nounwind {
192; X86-LABEL: signbits_ashr_shl_extract_sitofp:
193; X86:       # %bb.0:
194; X86-NEXT:    pushl %eax
195; X86-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
196; X86-NEXT:    vpsrad $29, %xmm0, %xmm0
197; X86-NEXT:    vpsllq $20, %xmm0, %xmm0
198; X86-NEXT:    vcvtdq2ps %xmm0, %xmm0
199; X86-NEXT:    vmovss %xmm0, (%esp)
200; X86-NEXT:    flds (%esp)
201; X86-NEXT:    popl %eax
202; X86-NEXT:    retl
203;
204; X64-LABEL: signbits_ashr_shl_extract_sitofp:
205; X64:       # %bb.0:
206; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
207; X64-NEXT:    vpsrad $29, %xmm0, %xmm0
208; X64-NEXT:    vpsllq $20, %xmm0, %xmm0
209; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
210; X64-NEXT:    retq
211  %1 = ashr <2 x i64> %a0, <i64 61, i64 60>
212  %2 = shl <2 x i64> %1, <i64 20, i64 16>
213  %3 = extractelement <2 x i64> %2, i32 0
214  %4 = sitofp i64 %3 to float
215  ret float %4
216}
217
218define float @signbits_ashr_insert_ashr_extract_sitofp(i64 %a0, i64 %a1) nounwind {
219; X86-LABEL: signbits_ashr_insert_ashr_extract_sitofp:
220; X86:       # %bb.0:
221; X86-NEXT:    pushl %eax
222; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
223; X86-NEXT:    vmovd %eax, %xmm0
224; X86-NEXT:    sarl $30, %eax
225; X86-NEXT:    vpslld $2, %xmm0, %xmm0
226; X86-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
227; X86-NEXT:    vpsrlq $3, %xmm0, %xmm0
228; X86-NEXT:    vcvtdq2ps %xmm0, %xmm0
229; X86-NEXT:    vmovss %xmm0, (%esp)
230; X86-NEXT:    flds (%esp)
231; X86-NEXT:    popl %eax
232; X86-NEXT:    retl
233;
234; X64-LABEL: signbits_ashr_insert_ashr_extract_sitofp:
235; X64:       # %bb.0:
236; X64-NEXT:    sarq $30, %rdi
237; X64-NEXT:    vmovq %rdi, %xmm0
238; X64-NEXT:    vpsrlq $3, %xmm0, %xmm0
239; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
240; X64-NEXT:    retq
241  %1 = ashr i64 %a0, 30
242  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
243  %3 = insertelement <2 x i64> %2, i64 %a1, i32 1
244  %4 = ashr <2 x i64> %3, <i64 3, i64 3>
245  %5 = extractelement <2 x i64> %4, i32 0
246  %6 = sitofp i64 %5 to float
247  ret float %6
248}
249
250define <4 x double> @signbits_sext_shuffle_sitofp(<4 x i32> %a0, <4 x i64> %a1) nounwind {
251; X86-LABEL: signbits_sext_shuffle_sitofp:
252; X86:       # %bb.0:
253; X86-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero
254; X86-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
255; X86-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
256; X86-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
257; X86-NEXT:    vextractf128 $1, %ymm0, %xmm1
258; X86-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
259; X86-NEXT:    vcvtdq2pd %xmm0, %ymm0
260; X86-NEXT:    retl
261;
262; X64-AVX1-LABEL: signbits_sext_shuffle_sitofp:
263; X64-AVX1:       # %bb.0:
264; X64-AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero
265; X64-AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
266; X64-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
267; X64-AVX1-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
268; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
269; X64-AVX1-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
270; X64-AVX1-NEXT:    vcvtdq2pd %xmm0, %ymm0
271; X64-AVX1-NEXT:    retq
272;
273; X64-AVX2-LABEL: signbits_sext_shuffle_sitofp:
274; X64-AVX2:       # %bb.0:
275; X64-AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
276; X64-AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,2,1,0]
277; X64-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
278; X64-AVX2-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
279; X64-AVX2-NEXT:    vcvtdq2pd %xmm0, %ymm0
280; X64-AVX2-NEXT:    retq
281  %1 = sext <4 x i32> %a0 to <4 x i64>
282  %2 = shufflevector <4 x i64> %1, <4 x i64>%a1, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
283  %3 = sitofp <4 x i64> %2 to <4 x double>
284  ret <4 x double> %3
285}
286
287define <2 x double> @signbits_sext_shl_sitofp(<2 x i16> %a0) nounwind {
288; X86-LABEL: signbits_sext_shl_sitofp:
289; X86:       # %bb.0:
290; X86-NEXT:    vpmovsxwq %xmm0, %xmm0
291; X86-NEXT:    vpsllq $5, %xmm0, %xmm1
292; X86-NEXT:    vpsllq $11, %xmm0, %xmm0
293; X86-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
294; X86-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
295; X86-NEXT:    vcvtdq2pd %xmm0, %xmm0
296; X86-NEXT:    retl
297;
298; X64-AVX1-LABEL: signbits_sext_shl_sitofp:
299; X64-AVX1:       # %bb.0:
300; X64-AVX1-NEXT:    vpmovsxwq %xmm0, %xmm0
301; X64-AVX1-NEXT:    vpsllq $5, %xmm0, %xmm1
302; X64-AVX1-NEXT:    vpsllq $11, %xmm0, %xmm0
303; X64-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
304; X64-AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
305; X64-AVX1-NEXT:    vcvtdq2pd %xmm0, %xmm0
306; X64-AVX1-NEXT:    retq
307;
308; X64-AVX2-LABEL: signbits_sext_shl_sitofp:
309; X64-AVX2:       # %bb.0:
310; X64-AVX2-NEXT:    vpmovsxwq %xmm0, %xmm0
311; X64-AVX2-NEXT:    vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
312; X64-AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
313; X64-AVX2-NEXT:    vcvtdq2pd %xmm0, %xmm0
314; X64-AVX2-NEXT:    retq
315  %1 = sext <2 x i16> %a0 to <2 x i64>
316  %2 = shl <2 x i64> %1, <i64 11, i64 5>
317  %3 = sitofp <2 x i64> %2 to <2 x double>
318  ret <2 x double> %3
319}
320
321define <2 x double> @signbits_ashr_concat_ashr_extract_sitofp(<2 x i64> %a0, <4 x i64> %a1) nounwind {
322; CHECK-LABEL: signbits_ashr_concat_ashr_extract_sitofp:
323; CHECK:       # %bb.0:
324; CHECK-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[1,3,2,3]
325; CHECK-NEXT:    vcvtdq2pd %xmm0, %xmm0
326; CHECK-NEXT:    ret{{[l|q]}}
327  %1 = ashr <2 x i64> %a0, <i64 16, i64 16>
328  %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
329  %3 = shufflevector <4 x i64> %a1, <4 x i64> %2, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
330  %4 = ashr <4 x i64> %3, <i64 16, i64 16, i64 16, i64 16>
331  %5 = shufflevector <4 x i64> %4, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
332  %6 = sitofp <2 x i64> %5 to <2 x double>
333  ret <2 x double> %6
334}
335
336define float @signbits_ashr_sext_sextinreg_and_extract_sitofp(<2 x i64> %a0, <2 x i64> %a1, i32 %a2) nounwind {
337; X86-LABEL: signbits_ashr_sext_sextinreg_and_extract_sitofp:
338; X86:       # %bb.0:
339; X86-NEXT:    pushl %eax
340; X86-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
341; X86-NEXT:    vpsrad $29, %xmm0, %xmm0
342; X86-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
343; X86-NEXT:    vpand %xmm0, %xmm1, %xmm0
344; X86-NEXT:    vcvtdq2ps %xmm0, %xmm0
345; X86-NEXT:    vmovss %xmm0, (%esp)
346; X86-NEXT:    flds (%esp)
347; X86-NEXT:    popl %eax
348; X86-NEXT:    retl
349;
350; X64-LABEL: signbits_ashr_sext_sextinreg_and_extract_sitofp:
351; X64:       # %bb.0:
352; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
353; X64-NEXT:    vpsrad $29, %xmm0, %xmm0
354; X64-NEXT:    vmovd %edi, %xmm1
355; X64-NEXT:    vpand %xmm1, %xmm0, %xmm0
356; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
357; X64-NEXT:    retq
358  %1 = ashr <2 x i64> %a0, <i64 61, i64 60>
359  %2 = sext i32 %a2 to i64
360  %3 = insertelement <2 x i64> %a1, i64 %2, i32 0
361  %4 = shl <2 x i64> %3, <i64 20, i64 20>
362  %5 = ashr <2 x i64> %4, <i64 20, i64 20>
363  %6 = and <2 x i64> %1, %5
364  %7 = extractelement <2 x i64> %6, i32 0
365  %8 = sitofp i64 %7 to float
366  ret float %8
367}
368
369define float @signbits_ashr_sextvecinreg_bitops_extract_sitofp(<2 x i64> %a0, <4 x i32> %a1) nounwind {
370; X86-LABEL: signbits_ashr_sextvecinreg_bitops_extract_sitofp:
371; X86:       # %bb.0:
372; X86-NEXT:    pushl %eax
373; X86-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
374; X86-NEXT:    vpsrad $29, %xmm0, %xmm0
375; X86-NEXT:    vpxor %xmm0, %xmm1, %xmm0
376; X86-NEXT:    vcvtdq2ps %xmm0, %xmm0
377; X86-NEXT:    vmovss %xmm0, (%esp)
378; X86-NEXT:    flds (%esp)
379; X86-NEXT:    popl %eax
380; X86-NEXT:    retl
381;
382; X64-LABEL: signbits_ashr_sextvecinreg_bitops_extract_sitofp:
383; X64:       # %bb.0:
384; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
385; X64-NEXT:    vpsrad $29, %xmm0, %xmm0
386; X64-NEXT:    vpxor %xmm0, %xmm1, %xmm0
387; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
388; X64-NEXT:    retq
389  %1 = ashr <2 x i64> %a0, <i64 61, i64 60>
390  %2 = shufflevector <4 x i32> %a1, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
391  %3 = sext <2 x i32> %2 to <2 x i64>
392  %4 = and <2 x i64> %1, %3
393  %5 = or <2 x i64> %4, %3
394  %6 = xor <2 x i64> %5, %1
395  %7 = extractelement <2 x i64> %6, i32 0
396  %8 = sitofp i64 %7 to float
397  ret float %8
398}
399
400define <4 x float> @signbits_ashr_sext_select_shuffle_sitofp(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2, <4 x i32> %a3) nounwind {
401; X86-LABEL: signbits_ashr_sext_select_shuffle_sitofp:
402; X86:       # %bb.0:
403; X86-NEXT:    pushl %ebp
404; X86-NEXT:    movl %esp, %ebp
405; X86-NEXT:    andl $-16, %esp
406; X86-NEXT:    subl $16, %esp
407; X86-NEXT:    vmovapd 8(%ebp), %xmm3
408; X86-NEXT:    vpsrad $31, %xmm2, %xmm4
409; X86-NEXT:    vpshufd {{.*#+}} xmm5 = xmm2[1,1,3,3]
410; X86-NEXT:    vpsrad $1, %xmm5, %xmm5
411; X86-NEXT:    vpblendw {{.*#+}} xmm4 = xmm5[0,1],xmm4[2,3],xmm5[4,5],xmm4[6,7]
412; X86-NEXT:    vextractf128 $1, %ymm2, %xmm2
413; X86-NEXT:    vpsrad $31, %xmm2, %xmm5
414; X86-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
415; X86-NEXT:    vpsrad $1, %xmm2, %xmm2
416; X86-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm5[2,3],xmm2[4,5],xmm5[6,7]
417; X86-NEXT:    vshufps {{.*#+}} xmm5 = xmm3[2,2,3,3]
418; X86-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm6
419; X86-NEXT:    vextractf128 $1, %ymm1, %xmm1
420; X86-NEXT:    vextractf128 $1, %ymm0, %xmm0
421; X86-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
422; X86-NEXT:    vblendvpd %xmm0, %xmm2, %xmm5, %xmm0
423; X86-NEXT:    vblendvpd %xmm6, %xmm4, %xmm3, %xmm1
424; X86-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
425; X86-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
426; X86-NEXT:    vextractf128 $1, %ymm0, %xmm1
427; X86-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
428; X86-NEXT:    vcvtdq2ps %xmm0, %xmm0
429; X86-NEXT:    movl %ebp, %esp
430; X86-NEXT:    popl %ebp
431; X86-NEXT:    vzeroupper
432; X86-NEXT:    retl
433;
434; X64-AVX1-LABEL: signbits_ashr_sext_select_shuffle_sitofp:
435; X64-AVX1:       # %bb.0:
436; X64-AVX1-NEXT:    vpsrad $31, %xmm2, %xmm4
437; X64-AVX1-NEXT:    vpshufd {{.*#+}} xmm5 = xmm2[1,1,3,3]
438; X64-AVX1-NEXT:    vpsrad $1, %xmm5, %xmm5
439; X64-AVX1-NEXT:    vpblendw {{.*#+}} xmm4 = xmm5[0,1],xmm4[2,3],xmm5[4,5],xmm4[6,7]
440; X64-AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm2
441; X64-AVX1-NEXT:    vpsrad $31, %xmm2, %xmm5
442; X64-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
443; X64-AVX1-NEXT:    vpsrad $1, %xmm2, %xmm2
444; X64-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm5[2,3],xmm2[4,5],xmm5[6,7]
445; X64-AVX1-NEXT:    vshufps {{.*#+}} xmm5 = xmm3[2,2,3,3]
446; X64-AVX1-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm6
447; X64-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
448; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
449; X64-AVX1-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
450; X64-AVX1-NEXT:    vblendvpd %xmm0, %xmm2, %xmm5, %xmm0
451; X64-AVX1-NEXT:    vblendvpd %xmm6, %xmm4, %xmm3, %xmm1
452; X64-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
453; X64-AVX1-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
454; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
455; X64-AVX1-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
456; X64-AVX1-NEXT:    vcvtdq2ps %xmm0, %xmm0
457; X64-AVX1-NEXT:    vzeroupper
458; X64-AVX1-NEXT:    retq
459;
460; X64-AVX2-LABEL: signbits_ashr_sext_select_shuffle_sitofp:
461; X64-AVX2:       # %bb.0:
462; X64-AVX2-NEXT:    vpshufd {{.*#+}} ymm2 = ymm2[1,1,3,3,5,5,7,7]
463; X64-AVX2-NEXT:    vpsrad $1, %ymm2, %ymm2
464; X64-AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero
465; X64-AVX2-NEXT:    vpcmpeqq %ymm1, %ymm0, %ymm0
466; X64-AVX2-NEXT:    vblendvpd %ymm0, %ymm2, %ymm3, %ymm0
467; X64-AVX2-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
468; X64-AVX2-NEXT:    vextractf128 $1, %ymm0, %xmm1
469; X64-AVX2-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
470; X64-AVX2-NEXT:    vcvtdq2ps %xmm0, %xmm0
471; X64-AVX2-NEXT:    vzeroupper
472; X64-AVX2-NEXT:    retq
473  %1 = ashr <4 x i64> %a2, <i64 33, i64 63, i64 33, i64 63>
474  %2 = sext <4 x i32> %a3 to <4 x i64>
475  %3 = icmp eq <4 x i64> %a0, %a1
476  %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2
477  %5 = shufflevector <4 x i64> %4, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
478  %6 = sitofp <4 x i64> %5 to <4 x float>
479  ret <4 x float> %6
480}
481
482define <4 x i32> @signbits_mask_ashr_smax(<4 x i32> %a0, <4 x i32> %a1) {
483; X86-LABEL: signbits_mask_ashr_smax:
484; X86:       # %bb.0:
485; X86-NEXT:    vpand %xmm1, %xmm0, %xmm0
486; X86-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
487; X86-NEXT:    vpsrad $25, %xmm0, %xmm0
488; X86-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
489; X86-NEXT:    retl
490;
491; X64-AVX1-LABEL: signbits_mask_ashr_smax:
492; X64-AVX1:       # %bb.0:
493; X64-AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
494; X64-AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
495; X64-AVX1-NEXT:    vpsrad $25, %xmm0, %xmm0
496; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
497; X64-AVX1-NEXT:    retq
498;
499; X64-AVX2-LABEL: signbits_mask_ashr_smax:
500; X64-AVX2:       # %bb.0:
501; X64-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
502; X64-AVX2-NEXT:    vpsrad $25, %xmm0, %xmm0
503; X64-AVX2-NEXT:    vpbroadcastd %xmm0, %xmm0
504; X64-AVX2-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
505; X64-AVX2-NEXT:    retq
506  %1 = ashr <4 x i32> %a0, <i32 25, i32 26, i32 27, i32 0>
507  %2 = ashr <4 x i32> %a1, <i32 25, i32 26, i32 27, i32 0>
508  %3 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %1, <4 x i32> %2)
509  %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> zeroinitializer
510  %5 = ashr <4 x i32> %4, <i32 1, i32 2, i32 3, i32 4>
511  %6 = and <4 x i32> %5, <i32 -32768, i32 -65536, i32 -32768, i32 -65536>
512  ret <4 x i32> %6
513}
514declare <4 x i32> @llvm.smax.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
515
516define <4 x i32> @signbits_mask_ashr_smin(<4 x i32> %a0, <4 x i32> %a1) {
517; X86-LABEL: signbits_mask_ashr_smin:
518; X86:       # %bb.0:
519; X86-NEXT:    vpor %xmm1, %xmm0, %xmm0
520; X86-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
521; X86-NEXT:    vpsrad $25, %xmm0, %xmm0
522; X86-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
523; X86-NEXT:    retl
524;
525; X64-AVX1-LABEL: signbits_mask_ashr_smin:
526; X64-AVX1:       # %bb.0:
527; X64-AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
528; X64-AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
529; X64-AVX1-NEXT:    vpsrad $25, %xmm0, %xmm0
530; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
531; X64-AVX1-NEXT:    retq
532;
533; X64-AVX2-LABEL: signbits_mask_ashr_smin:
534; X64-AVX2:       # %bb.0:
535; X64-AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
536; X64-AVX2-NEXT:    vpsrad $25, %xmm0, %xmm0
537; X64-AVX2-NEXT:    vpbroadcastd %xmm0, %xmm0
538; X64-AVX2-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
539; X64-AVX2-NEXT:    retq
540  %1 = ashr <4 x i32> %a0, <i32 25, i32 26, i32 27, i32 0>
541  %2 = ashr <4 x i32> %a1, <i32 25, i32 26, i32 27, i32 0>
542  %3 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %1, <4 x i32> %2)
543  %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> zeroinitializer
544  %5 = ashr <4 x i32> %4, <i32 1, i32 2, i32 3, i32 4>
545  %6 = and <4 x i32> %5, <i32 -32768, i32 -65536, i32 -32768, i32 -65536>
546  ret <4 x i32> %6
547}
548declare <4 x i32> @llvm.smin.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
549
550define <4 x i32> @signbits_mask_ashr_umax(<4 x i32> %a0, <4 x i32> %a1) {
551; X86-LABEL: signbits_mask_ashr_umax:
552; X86:       # %bb.0:
553; X86-NEXT:    vpor %xmm1, %xmm0, %xmm0
554; X86-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
555; X86-NEXT:    vpsrad $25, %xmm0, %xmm0
556; X86-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
557; X86-NEXT:    retl
558;
559; X64-AVX1-LABEL: signbits_mask_ashr_umax:
560; X64-AVX1:       # %bb.0:
561; X64-AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
562; X64-AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
563; X64-AVX1-NEXT:    vpsrad $25, %xmm0, %xmm0
564; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
565; X64-AVX1-NEXT:    retq
566;
567; X64-AVX2-LABEL: signbits_mask_ashr_umax:
568; X64-AVX2:       # %bb.0:
569; X64-AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
570; X64-AVX2-NEXT:    vpsrad $25, %xmm0, %xmm0
571; X64-AVX2-NEXT:    vpbroadcastd %xmm0, %xmm0
572; X64-AVX2-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
573; X64-AVX2-NEXT:    retq
574  %1 = ashr <4 x i32> %a0, <i32 25, i32 26, i32 27, i32 0>
575  %2 = ashr <4 x i32> %a1, <i32 25, i32 26, i32 27, i32 0>
576  %3 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> %1, <4 x i32> %2)
577  %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> zeroinitializer
578  %5 = ashr <4 x i32> %4, <i32 1, i32 2, i32 3, i32 4>
579  %6 = and <4 x i32> %5, <i32 -32768, i32 -65536, i32 -32768, i32 -65536>
580  ret <4 x i32> %6
581}
582declare <4 x i32> @llvm.umax.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
583
584define <4 x i32> @signbits_mask_ashr_umin(<4 x i32> %a0, <4 x i32> %a1) {
585; X86-LABEL: signbits_mask_ashr_umin:
586; X86:       # %bb.0:
587; X86-NEXT:    vpand %xmm1, %xmm0, %xmm0
588; X86-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
589; X86-NEXT:    vpsrad $25, %xmm0, %xmm0
590; X86-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
591; X86-NEXT:    retl
592;
593; X64-AVX1-LABEL: signbits_mask_ashr_umin:
594; X64-AVX1:       # %bb.0:
595; X64-AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
596; X64-AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
597; X64-AVX1-NEXT:    vpsrad $25, %xmm0, %xmm0
598; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
599; X64-AVX1-NEXT:    retq
600;
601; X64-AVX2-LABEL: signbits_mask_ashr_umin:
602; X64-AVX2:       # %bb.0:
603; X64-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
604; X64-AVX2-NEXT:    vpsrad $25, %xmm0, %xmm0
605; X64-AVX2-NEXT:    vpbroadcastd %xmm0, %xmm0
606; X64-AVX2-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
607; X64-AVX2-NEXT:    retq
608  %1 = ashr <4 x i32> %a0, <i32 25, i32 26, i32 27, i32 0>
609  %2 = ashr <4 x i32> %a1, <i32 25, i32 26, i32 27, i32 0>
610  %3 = call <4 x i32> @llvm.umin.v4i32(<4 x i32> %1, <4 x i32> %2)
611  %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> zeroinitializer
612  %5 = ashr <4 x i32> %4, <i32 1, i32 2, i32 3, i32 4>
613  %6 = and <4 x i32> %5, <i32 -32768, i32 -65536, i32 -32768, i32 -65536>
614  ret <4 x i32> %6
615}
616declare <4 x i32> @llvm.umin.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
617
618define i32 @signbits_cmpss(float %0, float %1) {
619; X86-LABEL: signbits_cmpss:
620; X86:       # %bb.0:
621; X86-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
622; X86-NEXT:    vcmpeqss {{[0-9]+}}(%esp), %xmm0, %xmm0
623; X86-NEXT:    vmovd %xmm0, %eax
624; X86-NEXT:    retl
625;
626; X64-LABEL: signbits_cmpss:
627; X64:       # %bb.0:
628; X64-NEXT:    vcmpeqss %xmm1, %xmm0, %xmm0
629; X64-NEXT:    vmovd %xmm0, %eax
630; X64-NEXT:    retq
631  %3 = fcmp oeq float %0, %1
632  %4 = sext i1 %3 to i32
633  ret i32 %4
634}
635
636define i32 @signbits_cmpss_int(<4 x float> %0, <4 x float> %1) {
637; CHECK-LABEL: signbits_cmpss_int:
638; CHECK:       # %bb.0:
639; CHECK-NEXT:    vcmpeqss %xmm1, %xmm0, %xmm0
640; CHECK-NEXT:    vextractps $0, %xmm0, %eax
641; CHECK-NEXT:    ret{{[l|q]}}
642  %3 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %0, <4 x float> %1, i8 0)
643  %4 = bitcast <4 x float> %3 to <4 x i32>
644  %5 = extractelement <4 x i32> %4, i32 0
645  %6 = ashr i32 %5, 31
646  ret i32 %6
647}
648declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8 immarg)
649
650define i64 @signbits_cmpsd(double %0, double %1) {
651; X86-LABEL: signbits_cmpsd:
652; X86:       # %bb.0:
653; X86-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
654; X86-NEXT:    vcmpeqsd {{[0-9]+}}(%esp), %xmm0, %xmm0
655; X86-NEXT:    vmovd %xmm0, %eax
656; X86-NEXT:    andl $1, %eax
657; X86-NEXT:    negl %eax
658; X86-NEXT:    movl %eax, %edx
659; X86-NEXT:    retl
660;
661; X64-LABEL: signbits_cmpsd:
662; X64:       # %bb.0:
663; X64-NEXT:    vcmpeqsd %xmm1, %xmm0, %xmm0
664; X64-NEXT:    vmovq %xmm0, %rax
665; X64-NEXT:    retq
666  %3 = fcmp oeq double %0, %1
667  %4 = sext i1 %3 to i64
668  ret i64 %4
669}
670
671define i64 @signbits_cmpsd_int(<2 x double> %0, <2 x double> %1) {
672; X86-LABEL: signbits_cmpsd_int:
673; X86:       # %bb.0:
674; X86-NEXT:    vcmpeqsd %xmm1, %xmm0, %xmm0
675; X86-NEXT:    vextractps $1, %xmm0, %eax
676; X86-NEXT:    movl %eax, %edx
677; X86-NEXT:    retl
678;
679; X64-LABEL: signbits_cmpsd_int:
680; X64:       # %bb.0:
681; X64-NEXT:    vcmpeqsd %xmm1, %xmm0, %xmm0
682; X64-NEXT:    vmovq %xmm0, %rax
683; X64-NEXT:    retq
684  %3 = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %0, <2 x double> %1, i8 0)
685  %4 = bitcast <2 x double> %3 to <2 x i64>
686  %5 = extractelement <2 x i64> %4, i32 0
687  %6 = ashr i64 %5, 63
688  ret i64 %6
689}
690declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8 immarg)
691
692; Make sure we can preserve sign bit information into the second basic block
693; so we can avoid having to shift bit 0 into bit 7 for each element due to
694; v32i1->v32i8 promotion and the splitting of v32i8 into 2xv16i8. This requires
695; ComputeNumSignBits handling for insert_subvector.
696define void @cross_bb_signbits_insert_subvec(ptr %ptr, <32 x i8> %x, <32 x i8> %z) {
697; X86-LABEL: cross_bb_signbits_insert_subvec:
698; X86:       # %bb.0:
699; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
700; X86-NEXT:    vextractf128 $1, %ymm0, %xmm2
701; X86-NEXT:    vpxor %xmm3, %xmm3, %xmm3
702; X86-NEXT:    vpcmpeqb %xmm3, %xmm2, %xmm2
703; X86-NEXT:    vpcmpeqb %xmm3, %xmm0, %xmm0
704; X86-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
705; X86-NEXT:    vandnps %ymm1, %ymm0, %ymm1
706; X86-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
707; X86-NEXT:    vorps %ymm1, %ymm0, %ymm0
708; X86-NEXT:    vmovaps %ymm0, (%eax)
709; X86-NEXT:    vzeroupper
710; X86-NEXT:    retl
711;
712; X64-AVX1-LABEL: cross_bb_signbits_insert_subvec:
713; X64-AVX1:       # %bb.0:
714; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
715; X64-AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
716; X64-AVX1-NEXT:    vpcmpeqb %xmm3, %xmm2, %xmm2
717; X64-AVX1-NEXT:    vpcmpeqb %xmm3, %xmm0, %xmm0
718; X64-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
719; X64-AVX1-NEXT:    vandnps %ymm1, %ymm0, %ymm1
720; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
721; X64-AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
722; X64-AVX1-NEXT:    vmovaps %ymm0, (%rdi)
723; X64-AVX1-NEXT:    vzeroupper
724; X64-AVX1-NEXT:    retq
725;
726; X64-AVX2-LABEL: cross_bb_signbits_insert_subvec:
727; X64-AVX2:       # %bb.0:
728; X64-AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
729; X64-AVX2-NEXT:    vpcmpeqb %ymm2, %ymm0, %ymm0
730; X64-AVX2-NEXT:    vpblendvb %ymm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
731; X64-AVX2-NEXT:    vmovdqa %ymm0, (%rdi)
732; X64-AVX2-NEXT:    vzeroupper
733; X64-AVX2-NEXT:    retq
734  %a = icmp eq <32 x i8> %x, zeroinitializer
735  %b = icmp eq <32 x i8> %x, zeroinitializer
736  %c = and <32 x i1> %a, %b
737  br label %block
738
739block:
740  %d = select <32 x i1> %c, <32 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, <32 x i8> %z
741  store <32 x i8> %d, ptr %ptr, align 32
742  br label %exit
743
744exit:
745  ret void
746}
747
748