xref: /llvm-project/llvm/test/CodeGen/X86/known-bits-vector.ll (revision 834cc88c5d08ca55664b7742590463de813d768f)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X86
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X64
4
5define i32 @knownbits_mask_extract_sext(<8 x i16> %a0) nounwind {
6; X86-LABEL: knownbits_mask_extract_sext:
7; X86:       # %bb.0:
8; X86-NEXT:    vmovd %xmm0, %eax
9; X86-NEXT:    andl $15, %eax
10; X86-NEXT:    retl
11;
12; X64-LABEL: knownbits_mask_extract_sext:
13; X64:       # %bb.0:
14; X64-NEXT:    vmovd %xmm0, %eax
15; X64-NEXT:    andl $15, %eax
16; X64-NEXT:    retq
17  %1 = and <8 x i16> %a0, <i16 15, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
18  %2 = extractelement <8 x i16> %1, i32 0
19  %3 = sext i16 %2 to i32
20  ret i32 %3
21}
22
23define float @knownbits_mask_extract_uitofp(<2 x i64> %a0) nounwind {
24; X86-LABEL: knownbits_mask_extract_uitofp:
25; X86:       # %bb.0:
26; X86-NEXT:    pushl %eax
27; X86-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
28; X86-NEXT:    vcvtdq2ps %xmm0, %xmm0
29; X86-NEXT:    vmovss %xmm0, (%esp)
30; X86-NEXT:    flds (%esp)
31; X86-NEXT:    popl %eax
32; X86-NEXT:    retl
33;
34; X64-LABEL: knownbits_mask_extract_uitofp:
35; X64:       # %bb.0:
36; X64-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
37; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
38; X64-NEXT:    retq
39  %1 = and <2 x i64> %a0, <i64 65535, i64 -1>
40  %2 = extractelement <2 x i64> %1, i32 0
41  %3 = uitofp i64 %2 to float
42  ret float %3
43}
44
45define <4 x float> @knownbits_insert_uitofp(<4 x i32> %a0, i16 %a1, i16 %a2) nounwind {
46; X86-LABEL: knownbits_insert_uitofp:
47; X86:       # %bb.0:
48; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
49; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
50; X86-NEXT:    vmovd %ecx, %xmm0
51; X86-NEXT:    vpinsrd $2, %eax, %xmm0, %xmm0
52; X86-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
53; X86-NEXT:    vcvtdq2ps %xmm0, %xmm0
54; X86-NEXT:    retl
55;
56; X64-LABEL: knownbits_insert_uitofp:
57; X64:       # %bb.0:
58; X64-NEXT:    movzwl %di, %eax
59; X64-NEXT:    movzwl %si, %ecx
60; X64-NEXT:    vmovd %eax, %xmm0
61; X64-NEXT:    vpinsrd $2, %ecx, %xmm0, %xmm0
62; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
63; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
64; X64-NEXT:    retq
65  %1 = zext i16 %a1 to i32
66  %2 = zext i16 %a2 to i32
67  %3 = insertelement <4 x i32> %a0, i32 %1, i32 0
68  %4 = insertelement <4 x i32>  %3, i32 %2, i32 2
69  %5 = shufflevector <4 x i32> %4, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
70  %6 = uitofp <4 x i32> %5 to <4 x float>
71  ret <4 x float> %6
72}
73
74define <4 x i32> @knownbits_mask_shuffle_sext(<8 x i16> %a0) nounwind {
75; X86-LABEL: knownbits_mask_shuffle_sext:
76; X86:       # %bb.0:
77; X86-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
78; X86-NEXT:    vpxor %xmm1, %xmm1, %xmm1
79; X86-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
80; X86-NEXT:    retl
81;
82; X64-LABEL: knownbits_mask_shuffle_sext:
83; X64:       # %bb.0:
84; X64-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
85; X64-NEXT:    vpxor %xmm1, %xmm1, %xmm1
86; X64-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
87; X64-NEXT:    retq
88  %1 = and <8 x i16> %a0, <i16 -1, i16 -1, i16 -1, i16 -1, i16 15, i16 15, i16 15, i16 15>
89  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
90  %3 = sext <4 x i16> %2 to <4 x i32>
91  ret <4 x i32> %3
92}
93
94define <4 x i32> @knownbits_mask_shuffle_shuffle_sext(<8 x i16> %a0) nounwind {
95; X86-LABEL: knownbits_mask_shuffle_shuffle_sext:
96; X86:       # %bb.0:
97; X86-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
98; X86-NEXT:    vpxor %xmm1, %xmm1, %xmm1
99; X86-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
100; X86-NEXT:    retl
101;
102; X64-LABEL: knownbits_mask_shuffle_shuffle_sext:
103; X64:       # %bb.0:
104; X64-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
105; X64-NEXT:    vpxor %xmm1, %xmm1, %xmm1
106; X64-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
107; X64-NEXT:    retq
108  %1 = and <8 x i16> %a0, <i16 -1, i16 -1, i16 -1, i16 -1, i16 15, i16 15, i16 15, i16 15>
109  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
110  %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
111  %4 = sext <4 x i16> %3 to <4 x i32>
112  ret <4 x i32> %4
113}
114
115define <4 x i32> @knownbits_mask_shuffle_shuffle_undef_sext(<8 x i16> %a0) nounwind {
116; X86-LABEL: knownbits_mask_shuffle_shuffle_undef_sext:
117; X86:       # %bb.0:
118; X86-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
119; X86-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
120; X86-NEXT:    vpmovsxwd %xmm0, %xmm0
121; X86-NEXT:    retl
122;
123; X64-LABEL: knownbits_mask_shuffle_shuffle_undef_sext:
124; X64:       # %bb.0:
125; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
126; X64-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
127; X64-NEXT:    vpmovsxwd %xmm0, %xmm0
128; X64-NEXT:    retq
129  %1 = and <8 x i16> %a0, <i16 -1, i16 -1, i16 -1, i16 -1, i16 15, i16 15, i16 15, i16 15>
130  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
131  %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
132  %4 = sext <4 x i16> %3 to <4 x i32>
133  ret <4 x i32> %4
134}
135
136define <4 x float> @knownbits_mask_shuffle_uitofp(<4 x i32> %a0) nounwind {
137; X86-LABEL: knownbits_mask_shuffle_uitofp:
138; X86:       # %bb.0:
139; X86-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[2,2,3,3]
140; X86-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
141; X86-NEXT:    vcvtdq2ps %xmm0, %xmm0
142; X86-NEXT:    retl
143;
144; X64-LABEL: knownbits_mask_shuffle_uitofp:
145; X64:       # %bb.0:
146; X64-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[2,2,3,3]
147; X64-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
148; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
149; X64-NEXT:    retq
150  %1 = and <4 x i32> %a0, <i32 -1, i32 -1, i32 255, i32 4085>
151  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
152  %3 = uitofp <4 x i32> %2 to <4 x float>
153  ret <4 x float> %3
154}
155
156define <4 x float> @knownbits_mask_or_shuffle_uitofp(<4 x i32> %a0) nounwind {
157; X86-LABEL: knownbits_mask_or_shuffle_uitofp:
158; X86:       # %bb.0:
159; X86-NEXT:    vbroadcastss {{.*#+}} xmm0 = [6.5535E+4,6.5535E+4,6.5535E+4,6.5535E+4]
160; X86-NEXT:    retl
161;
162; X64-LABEL: knownbits_mask_or_shuffle_uitofp:
163; X64:       # %bb.0:
164; X64-NEXT:    vbroadcastss {{.*#+}} xmm0 = [6.5535E+4,6.5535E+4,6.5535E+4,6.5535E+4]
165; X64-NEXT:    retq
166  %1 = and <4 x i32> %a0, <i32 -1, i32 -1, i32 255, i32 4085>
167  %2 = or <4 x i32> %1, <i32 65535, i32 65535, i32 65535, i32 65535>
168  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
169  %4 = uitofp <4 x i32> %3 to <4 x float>
170  ret <4 x float> %4
171}
172
173define <4 x float> @knownbits_mask_xor_shuffle_uitofp(<4 x i32> %a0) nounwind {
174; X86-LABEL: knownbits_mask_xor_shuffle_uitofp:
175; X86:       # %bb.0:
176; X86-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[2,2,3,3]
177; X86-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
178; X86-NEXT:    vxorps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
179; X86-NEXT:    vcvtdq2ps %xmm0, %xmm0
180; X86-NEXT:    retl
181;
182; X64-LABEL: knownbits_mask_xor_shuffle_uitofp:
183; X64:       # %bb.0:
184; X64-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[2,2,3,3]
185; X64-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
186; X64-NEXT:    vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
187; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
188; X64-NEXT:    retq
189  %1 = and <4 x i32> %a0, <i32 -1, i32 -1, i32 255, i32 4085>
190  %2 = xor <4 x i32> %1, <i32 65535, i32 65535, i32 65535, i32 65535>
191  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
192  %4 = uitofp <4 x i32> %3 to <4 x float>
193  ret <4 x float> %4
194}
195
196define <4 x i32> @knownbits_mask_shl_shuffle_lshr(<4 x i32> %a0) nounwind {
197; X86-LABEL: knownbits_mask_shl_shuffle_lshr:
198; X86:       # %bb.0:
199; X86-NEXT:    vxorps %xmm0, %xmm0, %xmm0
200; X86-NEXT:    retl
201;
202; X64-LABEL: knownbits_mask_shl_shuffle_lshr:
203; X64:       # %bb.0:
204; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
205; X64-NEXT:    retq
206  %1 = and <4 x i32> %a0, <i32 -65536, i32 -7, i32 -7, i32 -65536>
207  %2 = shl <4 x i32> %1, <i32 17, i32 17, i32 17, i32 17>
208  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
209  %4 = lshr <4 x i32> %3, <i32 15, i32 15, i32 15, i32 15>
210  ret <4 x i32> %4
211}
212
213define <4 x i32> @knownbits_mask_ashr_shuffle_lshr(<4 x i32> %a0) nounwind {
214; X86-LABEL: knownbits_mask_ashr_shuffle_lshr:
215; X86:       # %bb.0:
216; X86-NEXT:    vxorps %xmm0, %xmm0, %xmm0
217; X86-NEXT:    retl
218;
219; X64-LABEL: knownbits_mask_ashr_shuffle_lshr:
220; X64:       # %bb.0:
221; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
222; X64-NEXT:    retq
223  %1 = and <4 x i32> %a0, <i32 131071, i32 -1, i32 -1, i32 131071>
224  %2 = ashr <4 x i32> %1, <i32 15, i32 15, i32 15, i32 15>
225  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
226  %4 = lshr <4 x i32> %3, <i32 30, i32 30, i32 30, i32 30>
227  ret <4 x i32> %4
228}
229
230define <4 x i32> @knownbits_mask_mul_shuffle_shl(<4 x i32> %a0, <4 x i32> %a1) nounwind {
231; X86-LABEL: knownbits_mask_mul_shuffle_shl:
232; X86:       # %bb.0:
233; X86-NEXT:    vxorps %xmm0, %xmm0, %xmm0
234; X86-NEXT:    retl
235;
236; X64-LABEL: knownbits_mask_mul_shuffle_shl:
237; X64:       # %bb.0:
238; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
239; X64-NEXT:    retq
240  %1 = and <4 x i32> %a0, <i32 -65536, i32 -7, i32 -7, i32 -65536>
241  %2 = mul <4 x i32> %a1, %1
242  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
243  %4 = shl <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22>
244  ret <4 x i32> %4
245}
246
247define <4 x i32> @knownbits_mask_trunc_shuffle_shl(<4 x i64> %a0) nounwind {
248; X86-LABEL: knownbits_mask_trunc_shuffle_shl:
249; X86:       # %bb.0:
250; X86-NEXT:    vxorps %xmm0, %xmm0, %xmm0
251; X86-NEXT:    retl
252;
253; X64-LABEL: knownbits_mask_trunc_shuffle_shl:
254; X64:       # %bb.0:
255; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
256; X64-NEXT:    retq
257  %1 = and <4 x i64> %a0, <i64 -65536, i64 -7, i64 7, i64 -65536>
258  %2 = trunc <4 x i64> %1 to <4 x i32>
259  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
260  %4 = shl <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22>
261  ret <4 x i32> %4
262}
263
264define <4 x i32> @knownbits_mask_add_shuffle_lshr(<4 x i32> %a0, <4 x i32> %a1) nounwind {
265; X86-LABEL: knownbits_mask_add_shuffle_lshr:
266; X86:       # %bb.0:
267; X86-NEXT:    vxorps %xmm0, %xmm0, %xmm0
268; X86-NEXT:    retl
269;
270; X64-LABEL: knownbits_mask_add_shuffle_lshr:
271; X64:       # %bb.0:
272; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
273; X64-NEXT:    retq
274  %1 = and <4 x i32> %a0, <i32 32767, i32 -1, i32 -1, i32 32767>
275  %2 = and <4 x i32> %a1, <i32 32767, i32 -1, i32 -1, i32 32767>
276  %3 = add <4 x i32> %1, %2
277  %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
278  %5 = lshr <4 x i32> %4, <i32 17, i32 17, i32 17, i32 17>
279  ret <4 x i32> %5
280}
281
282define <4 x i32> @knownbits_mask_sub_shuffle_lshr(<4 x i32> %a0) nounwind {
283; X86-LABEL: knownbits_mask_sub_shuffle_lshr:
284; X86:       # %bb.0:
285; X86-NEXT:    vxorps %xmm0, %xmm0, %xmm0
286; X86-NEXT:    retl
287;
288; X64-LABEL: knownbits_mask_sub_shuffle_lshr:
289; X64:       # %bb.0:
290; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
291; X64-NEXT:    retq
292  %1 = and <4 x i32> %a0, <i32 15, i32 -1, i32 -1, i32 15>
293  %2 = sub <4 x i32> <i32 255, i32 255, i32 255, i32 255>, %1
294  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
295  %4 = lshr <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22>
296  ret <4 x i32> %4
297}
298
299define <4 x i32> @knownbits_mask_udiv_shuffle_lshr(<4 x i32> %a0, <4 x i32> %a1) nounwind {
300; X86-LABEL: knownbits_mask_udiv_shuffle_lshr:
301; X86:       # %bb.0:
302; X86-NEXT:    vxorps %xmm0, %xmm0, %xmm0
303; X86-NEXT:    retl
304;
305; X64-LABEL: knownbits_mask_udiv_shuffle_lshr:
306; X64:       # %bb.0:
307; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
308; X64-NEXT:    retq
309  %1 = and <4 x i32> %a0, <i32 32767, i32 -1, i32 -1, i32 32767>
310  %2 = udiv <4 x i32> %1, %a1
311  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
312  %4 = lshr <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22>
313  ret <4 x i32> %4
314}
315
316define <4 x i32> @knownbits_urem_lshr(<4 x i32> %a0) nounwind {
317; X86-LABEL: knownbits_urem_lshr:
318; X86:       # %bb.0:
319; X86-NEXT:    vxorps %xmm0, %xmm0, %xmm0
320; X86-NEXT:    retl
321;
322; X64-LABEL: knownbits_urem_lshr:
323; X64:       # %bb.0:
324; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
325; X64-NEXT:    retq
326  %1 = urem <4 x i32> %a0, <i32 16, i32 16, i32 16, i32 16>
327  %2 = lshr <4 x i32> %1, <i32 22, i32 22, i32 22, i32 22>
328  ret <4 x i32> %2
329}
330
331define <4 x i32> @knownbits_mask_urem_shuffle_lshr(<4 x i32> %a0, <4 x i32> %a1) nounwind {
332; X86-LABEL: knownbits_mask_urem_shuffle_lshr:
333; X86:       # %bb.0:
334; X86-NEXT:    vxorps %xmm0, %xmm0, %xmm0
335; X86-NEXT:    retl
336;
337; X64-LABEL: knownbits_mask_urem_shuffle_lshr:
338; X64:       # %bb.0:
339; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
340; X64-NEXT:    retq
341  %1 = and <4 x i32> %a0, <i32 32767, i32 -1, i32 -1, i32 32767>
342  %2 = and <4 x i32> %a1, <i32 32767, i32 -1, i32 -1, i32 32767>
343  %3 = urem <4 x i32> %1, %2
344  %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
345  %5 = lshr <4 x i32> %4, <i32 22, i32 22, i32 22, i32 22>
346  ret <4 x i32> %5
347}
348
349define <4 x i32> @knownbits_mask_srem_shuffle_lshr(<4 x i32> %a0) nounwind {
350; X86-LABEL: knownbits_mask_srem_shuffle_lshr:
351; X86:       # %bb.0:
352; X86-NEXT:    vxorps %xmm0, %xmm0, %xmm0
353; X86-NEXT:    retl
354;
355; X64-LABEL: knownbits_mask_srem_shuffle_lshr:
356; X64:       # %bb.0:
357; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
358; X64-NEXT:    retq
359  %1 = and <4 x i32> %a0, <i32 -32768, i32 -1, i32 -1, i32 -32768>
360  %2 = srem <4 x i32> %1, <i32 16, i32 16, i32 16, i32 16>
361  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
362  %4 = lshr <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22>
363  ret <4 x i32> %4
364}
365
366define <4 x i32> @knownbits_mask_bswap_shuffle_shl(<4 x i32> %a0) nounwind {
367; X86-LABEL: knownbits_mask_bswap_shuffle_shl:
368; X86:       # %bb.0:
369; X86-NEXT:    vxorps %xmm0, %xmm0, %xmm0
370; X86-NEXT:    retl
371;
372; X64-LABEL: knownbits_mask_bswap_shuffle_shl:
373; X64:       # %bb.0:
374; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
375; X64-NEXT:    retq
376  %1 = and <4 x i32> %a0, <i32 32767, i32 -1, i32 -1, i32 32767>
377  %2 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %1)
378  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
379  %4 = shl <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22>
380  ret <4 x i32> %4
381}
382declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
383
384define <8 x float> @knownbits_mask_concat_uitofp(<4 x i32> %a0, <4 x i32> %a1) nounwind {
385; X86-LABEL: knownbits_mask_concat_uitofp:
386; X86:       # %bb.0:
387; X86-NEXT:    vshufps {{.*#+}} xmm1 = xmm1[1,3,1,3]
388; X86-NEXT:    vbroadcastss {{.*#+}} xmm2 = [131071,131071,131071,131071]
389; X86-NEXT:    vandps %xmm2, %xmm1, %xmm1
390; X86-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2,0,2]
391; X86-NEXT:    vandps %xmm2, %xmm0, %xmm0
392; X86-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
393; X86-NEXT:    vcvtdq2ps %ymm0, %ymm0
394; X86-NEXT:    retl
395;
396; X64-LABEL: knownbits_mask_concat_uitofp:
397; X64:       # %bb.0:
398; X64-NEXT:    vshufps {{.*#+}} xmm1 = xmm1[1,3,1,3]
399; X64-NEXT:    vbroadcastss {{.*#+}} xmm2 = [131071,131071,131071,131071]
400; X64-NEXT:    vandps %xmm2, %xmm1, %xmm1
401; X64-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2,0,2]
402; X64-NEXT:    vandps %xmm2, %xmm0, %xmm0
403; X64-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
404; X64-NEXT:    vcvtdq2ps %ymm0, %ymm0
405; X64-NEXT:    retq
406  %1 = and <4 x i32> %a0, <i32 131071, i32 -1, i32 131071, i32 -1>
407  %2 = and <4 x i32> %a1, <i32 -1, i32 131071, i32 -1, i32 131071>
408  %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <8 x i32> <i32 0, i32 2, i32 0, i32 2, i32 5, i32 7, i32 5, i32 7>
409  %4 = uitofp <8 x i32> %3 to <8 x float>
410  ret <8 x float> %4
411}
412
413define <4 x float> @knownbits_lshr_bitcast_shuffle_uitofp(<2 x i64> %a0, <4 x i32> %a1) nounwind {
414; X86-LABEL: knownbits_lshr_bitcast_shuffle_uitofp:
415; X86:       # %bb.0:
416; X86-NEXT:    vpsrlq $1, %xmm0, %xmm0
417; X86-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
418; X86-NEXT:    vcvtdq2ps %xmm0, %xmm0
419; X86-NEXT:    retl
420;
421; X64-LABEL: knownbits_lshr_bitcast_shuffle_uitofp:
422; X64:       # %bb.0:
423; X64-NEXT:    vpsrlq $1, %xmm0, %xmm0
424; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
425; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
426; X64-NEXT:    retq
427  %1 = lshr <2 x i64> %a0, <i64 1, i64 1>
428  %2 = bitcast <2 x i64> %1 to <4 x i32>
429  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
430  %4 = uitofp <4 x i32> %3 to <4 x float>
431  ret <4 x float> %4
432}
433
434define <4 x float> @knownbits_smax_smin_shuffle_uitofp(<4 x i32> %a0) {
435; X86-LABEL: knownbits_smax_smin_shuffle_uitofp:
436; X86:       # %bb.0:
437; X86-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
438; X86-NEXT:    vpxor %xmm1, %xmm1, %xmm1
439; X86-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
440; X86-NEXT:    vpmaxsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
441; X86-NEXT:    vcvtdq2ps %xmm0, %xmm0
442; X86-NEXT:    retl
443;
444; X64-LABEL: knownbits_smax_smin_shuffle_uitofp:
445; X64:       # %bb.0:
446; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
447; X64-NEXT:    vpxor %xmm1, %xmm1, %xmm1
448; X64-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
449; X64-NEXT:    vpmaxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
450; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
451; X64-NEXT:    retq
452  %1 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %a0, <4 x i32> <i32 0, i32 -65535, i32 -65535, i32 0>)
453  %2 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %1, <4 x i32> <i32 65535, i32 -1, i32 -1, i32 131071>)
454  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
455  %4 = uitofp <4 x i32> %3 to <4 x float>
456  ret <4 x float> %4
457}
458declare <4 x i32> @llvm.smin.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
459declare <4 x i32> @llvm.smax.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
460
461define <4 x float> @knownbits_umin_shuffle_uitofp(<4 x i32> %a0) {
462; X86-LABEL: knownbits_umin_shuffle_uitofp:
463; X86:       # %bb.0:
464; X86-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
465; X86-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
466; X86-NEXT:    vcvtdq2ps %xmm0, %xmm0
467; X86-NEXT:    retl
468;
469; X64-LABEL: knownbits_umin_shuffle_uitofp:
470; X64:       # %bb.0:
471; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
472; X64-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
473; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
474; X64-NEXT:    retq
475  %1 = call <4 x i32> @llvm.umin.v4i32(<4 x i32> %a0, <4 x i32> <i32 65535, i32 -1, i32 -1, i32 262143>)
476  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
477  %3 = uitofp <4 x i32> %2 to <4 x float>
478  ret <4 x float> %3
479}
480declare <4 x i32> @llvm.umin.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
481
482define <4 x i32> @knownbits_umax_shuffle_ashr(<4 x i32> %a0) {
483; X86-LABEL: knownbits_umax_shuffle_ashr:
484; X86:       # %bb.0:
485; X86-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
486; X86-NEXT:    retl
487;
488; X64-LABEL: knownbits_umax_shuffle_ashr:
489; X64:       # %bb.0:
490; X64-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
491; X64-NEXT:    retq
492  %1 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> %a0, <4 x i32> <i32 65535, i32 -1, i32 -1, i32 262143>)
493  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 2, i32 2>
494  %3 = ashr <4 x i32> %2, <i32 31, i32 31, i32 31, i32 31>
495  ret <4 x i32> %3
496}
497declare <4 x i32> @llvm.umax.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
498
499define <4 x float> @knownbits_mask_umax_shuffle_uitofp(<4 x i32> %a0) {
500; X86-LABEL: knownbits_mask_umax_shuffle_uitofp:
501; X86:       # %bb.0:
502; X86-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
503; X86-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
504; X86-NEXT:    vpmaxud {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
505; X86-NEXT:    vcvtdq2ps %xmm0, %xmm0
506; X86-NEXT:    retl
507;
508; X64-LABEL: knownbits_mask_umax_shuffle_uitofp:
509; X64:       # %bb.0:
510; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
511; X64-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
512; X64-NEXT:    vpmaxud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
513; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
514; X64-NEXT:    retq
515  %1 = and <4 x i32> %a0, <i32 65535, i32 -1, i32 -1, i32 262143>
516  %2 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> %1, <4 x i32> <i32 255, i32 -1, i32 -1, i32 1023>)
517  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
518  %4 = uitofp <4 x i32> %3 to <4 x float>
519  ret <4 x float> %4
520}
521
522define <4 x i32> @knownbits_mask_bitreverse_ashr(<4 x i32> %a0) {
523; X86-LABEL: knownbits_mask_bitreverse_ashr:
524; X86:       # %bb.0:
525; X86-NEXT:    vxorps %xmm0, %xmm0, %xmm0
526; X86-NEXT:    retl
527;
528; X64-LABEL: knownbits_mask_bitreverse_ashr:
529; X64:       # %bb.0:
530; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
531; X64-NEXT:    retq
532  %1 = and <4 x i32> %a0, <i32 -2, i32 -2, i32 -2, i32 -2>
533  %2 = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %1)
534  %3 = ashr <4 x i32> %2, <i32 31, i32 31, i32 31, i32 31>
535  ret <4 x i32> %3
536}
537declare <4 x i32> @llvm.bitreverse.v4i32(<4 x i32>) nounwind readnone
538
539; If we don't know that the input isn't INT_MIN we can't combine to sitofp
540define <4 x float> @knownbits_abs_uitofp(<4 x i32> %a0) {
541; X86-LABEL: knownbits_abs_uitofp:
542; X86:       # %bb.0:
543; X86-NEXT:    vpabsd %xmm0, %xmm0
544; X86-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
545; X86-NEXT:    vpsrld $16, %xmm0, %xmm0
546; X86-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
547; X86-NEXT:    vsubps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
548; X86-NEXT:    vaddps %xmm0, %xmm1, %xmm0
549; X86-NEXT:    retl
550;
551; X64-LABEL: knownbits_abs_uitofp:
552; X64:       # %bb.0:
553; X64-NEXT:    vpabsd %xmm0, %xmm0
554; X64-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
555; X64-NEXT:    vpsrld $16, %xmm0, %xmm0
556; X64-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
557; X64-NEXT:    vsubps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
558; X64-NEXT:    vaddps %xmm0, %xmm1, %xmm0
559; X64-NEXT:    retq
560  %1 = sub <4 x i32> zeroinitializer, %a0
561  %2 = icmp slt <4 x i32> %a0, zeroinitializer
562  %3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> %a0
563  %4 = uitofp <4 x i32> %3 to <4 x float>
564  ret <4 x float> %4
565}
566
567define <4 x float> @knownbits_or_abs_uitofp(<4 x i32> %a0) {
568; X86-LABEL: knownbits_or_abs_uitofp:
569; X86:       # %bb.0:
570; X86-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,0,2]
571; X86-NEXT:    vpor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
572; X86-NEXT:    vpabsd %xmm0, %xmm0
573; X86-NEXT:    vcvtdq2ps %xmm0, %xmm0
574; X86-NEXT:    retl
575;
576; X64-LABEL: knownbits_or_abs_uitofp:
577; X64:       # %bb.0:
578; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,0,2]
579; X64-NEXT:    vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
580; X64-NEXT:    vpabsd %xmm0, %xmm0
581; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
582; X64-NEXT:    retq
583  %1 = or <4 x i32> %a0, <i32 1, i32 0, i32 3, i32 0>
584  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 0, i32 2>
585  %3 = sub <4 x i32> zeroinitializer, %2
586  %4 = icmp slt <4 x i32> %2, zeroinitializer
587  %5 = select <4 x i1> %4, <4 x i32> %3, <4 x i32> %2
588  %6 = uitofp <4 x i32> %5 to <4 x float>
589  ret <4 x float> %6
590}
591
592define <4 x float> @knownbits_and_select_shuffle_uitofp(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2, <4 x i32> %a3) nounwind {
593; X86-LABEL: knownbits_and_select_shuffle_uitofp:
594; X86:       # %bb.0:
595; X86-NEXT:    pushl %ebp
596; X86-NEXT:    movl %esp, %ebp
597; X86-NEXT:    andl $-16, %esp
598; X86-NEXT:    subl $16, %esp
599; X86-NEXT:    vmovaps 8(%ebp), %xmm3
600; X86-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2, %xmm2
601; X86-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm3, %xmm3
602; X86-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
603; X86-NEXT:    vblendvps %xmm0, %xmm2, %xmm3, %xmm0
604; X86-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,0,2,2]
605; X86-NEXT:    vcvtdq2ps %xmm0, %xmm0
606; X86-NEXT:    movl %ebp, %esp
607; X86-NEXT:    popl %ebp
608; X86-NEXT:    retl
609;
610; X64-LABEL: knownbits_and_select_shuffle_uitofp:
611; X64:       # %bb.0:
612; X64-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
613; X64-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3
614; X64-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
615; X64-NEXT:    vblendvps %xmm0, %xmm2, %xmm3, %xmm0
616; X64-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,0,2,2]
617; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
618; X64-NEXT:    retq
619  %1 = and <4 x i32> %a2, <i32 65535, i32 -1, i32 255, i32 -1>
620  %2 = and <4 x i32> %a3, <i32 255, i32 -1, i32 65535, i32 -1>
621  %3 = icmp eq <4 x i32> %a0, %a1
622  %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
623  %5 = shufflevector <4 x i32> %4, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
624  %6 = uitofp <4 x i32> %5 to <4 x float>
625  ret <4 x float> %6
626}
627
628define <4 x float> @knownbits_lshr_and_select_shuffle_uitofp(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2, <4 x i32> %a3) nounwind {
629; X86-LABEL: knownbits_lshr_and_select_shuffle_uitofp:
630; X86:       # %bb.0:
631; X86-NEXT:    pushl %ebp
632; X86-NEXT:    movl %esp, %ebp
633; X86-NEXT:    andl $-16, %esp
634; X86-NEXT:    subl $16, %esp
635; X86-NEXT:    vmovaps 8(%ebp), %xmm3
636; X86-NEXT:    vpsrld $5, %xmm2, %xmm2
637; X86-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm3, %xmm3
638; X86-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
639; X86-NEXT:    vblendvps %xmm0, %xmm2, %xmm3, %xmm0
640; X86-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,0,2,2]
641; X86-NEXT:    vcvtdq2ps %xmm0, %xmm0
642; X86-NEXT:    movl %ebp, %esp
643; X86-NEXT:    popl %ebp
644; X86-NEXT:    retl
645;
646; X64-LABEL: knownbits_lshr_and_select_shuffle_uitofp:
647; X64:       # %bb.0:
648; X64-NEXT:    vpsrld $5, %xmm2, %xmm2
649; X64-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3
650; X64-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
651; X64-NEXT:    vblendvps %xmm0, %xmm2, %xmm3, %xmm0
652; X64-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,0,2,2]
653; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
654; X64-NEXT:    retq
655  %1 = lshr <4 x i32> %a2, <i32 5, i32 1, i32 5, i32 1>
656  %2 = and <4 x i32> %a3, <i32 255, i32 -1, i32 65535, i32 -1>
657  %3 = icmp eq <4 x i32> %a0, %a1
658  %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
659  %5 = shufflevector <4 x i32> %4, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
660  %6 = uitofp <4 x i32> %5 to <4 x float>
661  ret <4 x float> %6
662}
663
664define <2 x double> @knownbits_lshr_subvector_uitofp(<4 x i32> %x)  {
665; X86-LABEL: knownbits_lshr_subvector_uitofp:
666; X86:       # %bb.0:
667; X86-NEXT:    vpsrld $2, %xmm0, %xmm1
668; X86-NEXT:    vpsrld $1, %xmm0, %xmm0
669; X86-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
670; X86-NEXT:    vcvtdq2pd %xmm0, %xmm0
671; X86-NEXT:    retl
672;
673; X64-LABEL: knownbits_lshr_subvector_uitofp:
674; X64:       # %bb.0:
675; X64-NEXT:    vpsrld $2, %xmm0, %xmm1
676; X64-NEXT:    vpsrld $1, %xmm0, %xmm0
677; X64-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
678; X64-NEXT:    vcvtdq2pd %xmm0, %xmm0
679; X64-NEXT:    retq
680  %1 = lshr <4 x i32> %x, <i32 1, i32 2, i32 0, i32 0>
681  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
682  %3 = uitofp <2 x i32> %2 to <2 x double>
683  ret <2 x double> %3
684}
685