xref: /llvm-project/llvm/test/CodeGen/X86/freeze-vector.ll (revision b1a48af56a62b8c0d5636c9404251700264fcd70)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-- -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X86
3; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,X64
4
5define <2 x i64> @freeze_insert_vector_elt(<2 x i64> %a0) {
6; CHECK-LABEL: freeze_insert_vector_elt:
7; CHECK:       # %bb.0:
8; CHECK-NEXT:    vxorps %xmm0, %xmm0, %xmm0
9; CHECK-NEXT:    ret{{[l|q]}}
10  %idx0 = insertelement <2 x i64> %a0, i64 0, i64 0
11  %freeze0 = freeze <2 x i64> %idx0
12  %idx1 = insertelement <2 x i64> %freeze0, i64 0, i64 1
13  %freeze1 = freeze <2 x i64> %idx1
14  ret <2 x i64> %freeze1
15}
16
17define <4 x i32> @freeze_insert_subvector(<8 x i32> %a0) nounwind {
18; CHECK-LABEL: freeze_insert_subvector:
19; CHECK:       # %bb.0:
20; CHECK-NEXT:    vxorps %xmm0, %xmm0, %xmm0
21; CHECK-NEXT:    ret{{[l|q]}}
22  %x = shufflevector <8 x i32> %a0, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
23  %y = freeze <8 x i32> %x
24  %z = shufflevector <8 x i32> %y, <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
25  ret <4 x i32> %z
26}
27
28define <2 x i64> @freeze_sign_extend_vector_inreg(<16 x i8> %a0) nounwind {
29; CHECK-LABEL: freeze_sign_extend_vector_inreg:
30; CHECK:       # %bb.0:
31; CHECK-NEXT:    vpmovsxbq %xmm0, %xmm0
32; CHECK-NEXT:    ret{{[l|q]}}
33  %x = sext <16 x i8> %a0 to <16 x i32>
34  %y = shufflevector <16 x i32> %x, <16 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
35  %z = freeze <4 x i32> %y
36  %w = sext <4 x i32> %z to <4 x i64>
37  %r = shufflevector <4 x i64> %w, <4 x i64> poison, <2 x i32> <i32 0, i32 1>
38  ret <2 x i64> %r
39}
40
41define <2 x i64> @freeze_zero_extend_vector_inreg(<16 x i8> %a0) nounwind {
42; CHECK-LABEL: freeze_zero_extend_vector_inreg:
43; CHECK:       # %bb.0:
44; CHECK-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
45; CHECK-NEXT:    ret{{[l|q]}}
46  %x = zext <16 x i8> %a0 to <16 x i32>
47  %y = shufflevector <16 x i32> %x, <16 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
48  %z = freeze <4 x i32> %y
49  %w = zext <4 x i32> %z to <4 x i64>
50  %r = shufflevector <4 x i64> %w, <4 x i64> poison, <2 x i32> <i32 0, i32 1>
51  ret <2 x i64> %r
52}
53
54define <4 x i32> @freeze_pshufd(<4 x i32> %a0) nounwind {
55; CHECK-LABEL: freeze_pshufd:
56; CHECK:       # %bb.0:
57; CHECK-NEXT:    ret{{[l|q]}}
58  %x = shufflevector <4 x i32> %a0, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
59  %y = freeze <4 x i32> %x
60  %z = shufflevector <4 x i32> %y, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
61  ret <4 x i32> %z
62}
63
64define <4 x float> @freeze_permilps(<4 x float> %a0) nounwind {
65; CHECK-LABEL: freeze_permilps:
66; CHECK:       # %bb.0:
67; CHECK-NEXT:    ret{{[l|q]}}
68  %x = shufflevector <4 x float> %a0, <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
69  %y = freeze <4 x float> %x
70  %z = shufflevector <4 x float> %y, <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
71  ret <4 x float> %z
72}
73
74define void @freeze_bitcast_from_wider_elt(ptr %origin, ptr %dst) nounwind {
75; X86-LABEL: freeze_bitcast_from_wider_elt:
76; X86:       # %bb.0:
77; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
78; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
79; X86-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
80; X86-NEXT:    vmovsd %xmm0, (%eax)
81; X86-NEXT:    retl
82;
83; X64-LABEL: freeze_bitcast_from_wider_elt:
84; X64:       # %bb.0:
85; X64-NEXT:    movq (%rdi), %rax
86; X64-NEXT:    movq %rax, (%rsi)
87; X64-NEXT:    retq
88  %i0 = load <4 x i16>, ptr %origin
89  %i1 = bitcast <4 x i16> %i0 to <8 x i8>
90  %i2 = freeze <8 x i8> %i1
91  %i3 = bitcast <8 x i8> %i2 to i64
92  store i64 %i3, ptr %dst
93  ret void
94}
95define void @freeze_bitcast_from_wider_elt_escape(ptr %origin, ptr %escape, ptr %dst) nounwind {
96; X86-LABEL: freeze_bitcast_from_wider_elt_escape:
97; X86:       # %bb.0:
98; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
99; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
100; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
101; X86-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
102; X86-NEXT:    vmovsd %xmm0, (%ecx)
103; X86-NEXT:    vmovsd %xmm0, (%eax)
104; X86-NEXT:    retl
105;
106; X64-LABEL: freeze_bitcast_from_wider_elt_escape:
107; X64:       # %bb.0:
108; X64-NEXT:    movq (%rdi), %rax
109; X64-NEXT:    movq %rax, (%rsi)
110; X64-NEXT:    movq %rax, (%rdx)
111; X64-NEXT:    retq
112  %i0 = load <4 x i16>, ptr %origin
113  %i1 = bitcast <4 x i16> %i0 to <8 x i8>
114  store <8 x i8> %i1, ptr %escape
115  %i2 = freeze <8 x i8> %i1
116  %i3 = bitcast <8 x i8> %i2 to i64
117  store i64 %i3, ptr %dst
118  ret void
119}
120
121define void @freeze_bitcast_to_wider_elt(ptr %origin, ptr %dst) nounwind {
122; X86-LABEL: freeze_bitcast_to_wider_elt:
123; X86:       # %bb.0:
124; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
125; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
126; X86-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
127; X86-NEXT:    vmovsd %xmm0, (%eax)
128; X86-NEXT:    retl
129;
130; X64-LABEL: freeze_bitcast_to_wider_elt:
131; X64:       # %bb.0:
132; X64-NEXT:    movq (%rdi), %rax
133; X64-NEXT:    movq %rax, (%rsi)
134; X64-NEXT:    retq
135  %i0 = load <8 x i8>, ptr %origin
136  %i1 = bitcast <8 x i8> %i0 to <4 x i16>
137  %i2 = freeze <4 x i16> %i1
138  %i3 = bitcast <4 x i16> %i2 to i64
139  store i64 %i3, ptr %dst
140  ret void
141}
142define void @freeze_bitcast_to_wider_elt_escape(ptr %origin, ptr %escape, ptr %dst) nounwind {
143; X86-LABEL: freeze_bitcast_to_wider_elt_escape:
144; X86:       # %bb.0:
145; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
146; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
147; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
148; X86-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
149; X86-NEXT:    vmovsd %xmm0, (%ecx)
150; X86-NEXT:    vmovsd %xmm0, (%eax)
151; X86-NEXT:    retl
152;
153; X64-LABEL: freeze_bitcast_to_wider_elt_escape:
154; X64:       # %bb.0:
155; X64-NEXT:    movq (%rdi), %rax
156; X64-NEXT:    movq %rax, (%rsi)
157; X64-NEXT:    movq %rax, (%rdx)
158; X64-NEXT:    retq
159  %i0 = load <8 x i8>, ptr %origin
160  %i1 = bitcast <8 x i8> %i0 to <4 x i16>
161  store <4 x i16> %i1, ptr %escape
162  %i2 = freeze <4 x i16> %i1
163  %i3 = bitcast <4 x i16> %i2 to i64
164  store i64 %i3, ptr %dst
165  ret void
166}
167
168define void @freeze_extractelement(ptr %origin0, ptr %origin1, ptr %dst) nounwind {
169; X86-LABEL: freeze_extractelement:
170; X86:       # %bb.0:
171; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
172; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
173; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
174; X86-NEXT:    vmovdqa (%edx), %xmm0
175; X86-NEXT:    vpand (%ecx), %xmm0, %xmm0
176; X86-NEXT:    vpextrb $6, %xmm0, (%eax)
177; X86-NEXT:    retl
178;
179; X64-LABEL: freeze_extractelement:
180; X64:       # %bb.0:
181; X64-NEXT:    vmovdqa (%rdi), %xmm0
182; X64-NEXT:    vpand (%rsi), %xmm0, %xmm0
183; X64-NEXT:    vpextrb $6, %xmm0, (%rdx)
184; X64-NEXT:    retq
185  %i0 = load <16 x i8>, ptr %origin0
186  %i1 = load <16 x i8>, ptr %origin1
187  %i2 = and <16 x i8> %i0, %i1
188  %i3 = freeze <16 x i8> %i2
189  %i4 = extractelement <16 x i8> %i3, i64 6
190  store i8 %i4, ptr %dst
191  ret void
192}
193define void @freeze_extractelement_escape(ptr %origin0, ptr %origin1, ptr %dst, ptr %escape) nounwind {
194; X86-LABEL: freeze_extractelement_escape:
195; X86:       # %bb.0:
196; X86-NEXT:    pushl %esi
197; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
198; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
199; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
200; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
201; X86-NEXT:    vmovdqa (%esi), %xmm0
202; X86-NEXT:    vpand (%edx), %xmm0, %xmm0
203; X86-NEXT:    vmovdqa %xmm0, (%ecx)
204; X86-NEXT:    vpextrb $6, %xmm0, (%eax)
205; X86-NEXT:    popl %esi
206; X86-NEXT:    retl
207;
208; X64-LABEL: freeze_extractelement_escape:
209; X64:       # %bb.0:
210; X64-NEXT:    vmovdqa (%rdi), %xmm0
211; X64-NEXT:    vpand (%rsi), %xmm0, %xmm0
212; X64-NEXT:    vmovdqa %xmm0, (%rcx)
213; X64-NEXT:    vpextrb $6, %xmm0, (%rdx)
214; X64-NEXT:    retq
215  %i0 = load <16 x i8>, ptr %origin0
216  %i1 = load <16 x i8>, ptr %origin1
217  %i2 = and <16 x i8> %i0, %i1
218  %i3 = freeze <16 x i8> %i2
219  store <16 x i8> %i3, ptr %escape
220  %i4 = extractelement <16 x i8> %i3, i64 6
221  store i8 %i4, ptr %dst
222  ret void
223}
224
225; It would be a miscompilation to pull freeze out of extractelement here.
226define void @freeze_extractelement_extra_use(ptr %origin0, ptr %origin1, i64 %idx0, i64 %idx1, ptr %dst, ptr %escape) nounwind {
227; X86-LABEL: freeze_extractelement_extra_use:
228; X86:       # %bb.0:
229; X86-NEXT:    pushl %ebp
230; X86-NEXT:    movl %esp, %ebp
231; X86-NEXT:    pushl %edi
232; X86-NEXT:    pushl %esi
233; X86-NEXT:    andl $-16, %esp
234; X86-NEXT:    subl $16, %esp
235; X86-NEXT:    movl 24(%ebp), %eax
236; X86-NEXT:    andl $15, %eax
237; X86-NEXT:    movl 16(%ebp), %ecx
238; X86-NEXT:    andl $15, %ecx
239; X86-NEXT:    movl 32(%ebp), %edx
240; X86-NEXT:    movl 12(%ebp), %esi
241; X86-NEXT:    movl 8(%ebp), %edi
242; X86-NEXT:    vmovaps (%edi), %xmm0
243; X86-NEXT:    vandps (%esi), %xmm0, %xmm0
244; X86-NEXT:    vmovaps %xmm0, (%esp)
245; X86-NEXT:    movzbl (%esp,%ecx), %ecx
246; X86-NEXT:    cmpb (%esp,%eax), %cl
247; X86-NEXT:    sete (%edx)
248; X86-NEXT:    leal -8(%ebp), %esp
249; X86-NEXT:    popl %esi
250; X86-NEXT:    popl %edi
251; X86-NEXT:    popl %ebp
252; X86-NEXT:    retl
253;
254; X64-LABEL: freeze_extractelement_extra_use:
255; X64:       # %bb.0:
256; X64-NEXT:    andl $15, %ecx
257; X64-NEXT:    andl $15, %edx
258; X64-NEXT:    vmovaps (%rdi), %xmm0
259; X64-NEXT:    vandps (%rsi), %xmm0, %xmm0
260; X64-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
261; X64-NEXT:    movzbl -24(%rsp,%rdx), %eax
262; X64-NEXT:    cmpb -24(%rsp,%rcx), %al
263; X64-NEXT:    sete (%r8)
264; X64-NEXT:    retq
265  %i0 = load <16 x i8>, ptr %origin0
266  %i1 = load <16 x i8>, ptr %origin1
267  %i2 = and <16 x i8> %i0, %i1
268  %i3 = freeze <16 x i8> %i2
269  %i4 = extractelement <16 x i8> %i3, i64 %idx0
270  %i5 = extractelement <16 x i8> %i3, i64 %idx1
271  %i6 = icmp eq i8 %i4, %i5
272  store i1 %i6, ptr %dst
273  ret void
274}
275
276define void @freeze_buildvector_single_maybe_poison_operand(ptr %origin, ptr %dst) nounwind {
277; X86-LABEL: freeze_buildvector_single_maybe_poison_operand:
278; X86:       # %bb.0:
279; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
280; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
281; X86-NEXT:    vbroadcastss {{.*#+}} xmm0 = [42,42,42,42]
282; X86-NEXT:    vpinsrd $0, (%ecx), %xmm0, %xmm0
283; X86-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
284; X86-NEXT:    vmovdqa %xmm0, (%eax)
285; X86-NEXT:    retl
286;
287; X64-LABEL: freeze_buildvector_single_maybe_poison_operand:
288; X64:       # %bb.0:
289; X64-NEXT:    vpbroadcastd {{.*#+}} xmm0 = [42,42,42,42]
290; X64-NEXT:    vpinsrd $0, (%rdi), %xmm0, %xmm0
291; X64-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7]
292; X64-NEXT:    vpand %xmm1, %xmm0, %xmm0
293; X64-NEXT:    vmovdqa %xmm0, (%rsi)
294; X64-NEXT:    retq
295  %i0.src = load i32, ptr %origin
296  %i0 = and i32 %i0.src, 15
297  %i1 = insertelement <4 x i32> poison, i32 %i0, i64 0
298  %i2 = insertelement <4 x i32> %i1, i32 42, i64 1
299  %i3 = insertelement <4 x i32> %i2, i32 42, i64 2
300  %i4 = insertelement <4 x i32> %i3, i32 42, i64 3
301  %i5 = freeze <4 x i32> %i4
302  %i6 = and <4 x i32> %i5, <i32 7, i32 7, i32 7, i32 7>
303  store <4 x i32> %i6, ptr %dst
304  ret void
305}
306
307define void @freeze_buildvector_single_repeated_maybe_poison_operand(ptr %origin, ptr %dst) nounwind {
308; X86-LABEL: freeze_buildvector_single_repeated_maybe_poison_operand:
309; X86:       # %bb.0:
310; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
311; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
312; X86-NEXT:    movl (%ecx), %ecx
313; X86-NEXT:    andl $15, %ecx
314; X86-NEXT:    vbroadcastss {{.*#+}} xmm0 = [42,42,42,42]
315; X86-NEXT:    vpinsrd $0, %ecx, %xmm0, %xmm0
316; X86-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
317; X86-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
318; X86-NEXT:    vmovdqa %xmm0, (%eax)
319; X86-NEXT:    retl
320;
321; X64-LABEL: freeze_buildvector_single_repeated_maybe_poison_operand:
322; X64:       # %bb.0:
323; X64-NEXT:    vpbroadcastd {{.*#+}} xmm0 = [42,42,42,42]
324; X64-NEXT:    vpinsrd $0, (%rdi), %xmm0, %xmm0
325; X64-NEXT:    vpbroadcastq %xmm0, %xmm0
326; X64-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7]
327; X64-NEXT:    vpand %xmm1, %xmm0, %xmm0
328; X64-NEXT:    vmovdqa %xmm0, (%rsi)
329; X64-NEXT:    retq
330  %i0.src = load i32, ptr %origin
331  %i0 = and i32 %i0.src, 15
332  %i1 = insertelement <4 x i32> poison, i32 %i0, i64 0
333  %i2 = insertelement <4 x i32> %i1, i32 42, i64 1
334  %i3 = insertelement <4 x i32> %i2, i32 %i0, i64 2
335  %i4 = insertelement <4 x i32> %i3, i32 42, i64 3
336  %i5 = freeze <4 x i32> %i4
337  %i6 = and <4 x i32> %i5, <i32 7, i32 7, i32 7, i32 7>
338  store <4 x i32> %i6, ptr %dst
339  ret void
340}
341
342define void @freeze_two_frozen_buildvectors(ptr %origin0, ptr %origin1, ptr %dst0, ptr %dst1) nounwind {
343; X86-LABEL: freeze_two_frozen_buildvectors:
344; X86:       # %bb.0:
345; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
346; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
347; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
348; X86-NEXT:    movl (%edx), %edx
349; X86-NEXT:    andl $15, %edx
350; X86-NEXT:    vpinsrd $1, %edx, %xmm0, %xmm0
351; X86-NEXT:    vbroadcastss {{.*#+}} xmm1 = [7,7,7,7]
352; X86-NEXT:    vpand %xmm1, %xmm0, %xmm0
353; X86-NEXT:    vmovdqa %xmm0, (%ecx)
354; X86-NEXT:    vmovd %edx, %xmm0
355; X86-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
356; X86-NEXT:    vpxor %xmm2, %xmm2, %xmm2
357; X86-NEXT:    vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5],xmm2[6,7]
358; X86-NEXT:    vpand %xmm1, %xmm0, %xmm0
359; X86-NEXT:    vmovdqa %xmm0, (%eax)
360; X86-NEXT:    retl
361;
362; X64-LABEL: freeze_two_frozen_buildvectors:
363; X64:       # %bb.0:
364; X64-NEXT:    movl (%rdi), %eax
365; X64-NEXT:    andl $15, %eax
366; X64-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
367; X64-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7]
368; X64-NEXT:    vpand %xmm1, %xmm0, %xmm0
369; X64-NEXT:    vmovdqa %xmm0, (%rdx)
370; X64-NEXT:    vmovd %eax, %xmm0
371; X64-NEXT:    vpbroadcastd %xmm0, %xmm0
372; X64-NEXT:    vpxor %xmm2, %xmm2, %xmm2
373; X64-NEXT:    vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm0[2],xmm2[3]
374; X64-NEXT:    vpand %xmm1, %xmm0, %xmm0
375; X64-NEXT:    vmovdqa %xmm0, (%rcx)
376; X64-NEXT:    retq
377  %i0.src = load i32, ptr %origin0
378  %i0 = and i32 %i0.src, 15
379  %i1.src = load i32, ptr %origin1
380  %i1 = and i32 %i0.src, 15
381  %i2 = insertelement <4 x i32> poison, i32 %i0, i64 1
382  %i3 = and <4 x i32> %i2, <i32 7, i32 7, i32 7, i32 7>
383  %i4 = freeze <4 x i32> %i3
384  store <4 x i32> %i4, ptr %dst0
385  %i5 = insertelement <4 x i32> poison, i32 %i1, i64 2
386  %i6 = and <4 x i32> %i5, <i32 7, i32 7, i32 7, i32 7>
387  %i7 = freeze <4 x i32> %i6
388  store <4 x i32> %i7, ptr %dst1
389  ret void
390}
391
392define void @freeze_two_buildvectors_only_one_frozen(ptr %origin0, ptr %origin1, ptr %dst0, ptr %dst1) nounwind {
393; X86-LABEL: freeze_two_buildvectors_only_one_frozen:
394; X86:       # %bb.0:
395; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
396; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
397; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
398; X86-NEXT:    movl (%edx), %edx
399; X86-NEXT:    andl $15, %edx
400; X86-NEXT:    vpxor %xmm0, %xmm0, %xmm0
401; X86-NEXT:    vmovd %edx, %xmm1
402; X86-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[0,0,1,1]
403; X86-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5,6,7]
404; X86-NEXT:    vbroadcastss {{.*#+}} xmm2 = [7,7,7,7]
405; X86-NEXT:    vpand %xmm2, %xmm0, %xmm0
406; X86-NEXT:    vmovdqa %xmm0, (%ecx)
407; X86-NEXT:    vpshufd {{.*#+}} xmm0 = xmm1[0,1,0,1]
408; X86-NEXT:    vpand %xmm2, %xmm0, %xmm0
409; X86-NEXT:    vmovdqa %xmm0, (%eax)
410; X86-NEXT:    retl
411;
412; X64-LABEL: freeze_two_buildvectors_only_one_frozen:
413; X64:       # %bb.0:
414; X64-NEXT:    movl (%rdi), %eax
415; X64-NEXT:    andl $15, %eax
416; X64-NEXT:    vpxor %xmm0, %xmm0, %xmm0
417; X64-NEXT:    vmovd %eax, %xmm1
418; X64-NEXT:    vpbroadcastd %xmm1, %xmm1
419; X64-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
420; X64-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [7,7,7,7]
421; X64-NEXT:    vpand %xmm2, %xmm0, %xmm0
422; X64-NEXT:    vmovdqa %xmm0, (%rdx)
423; X64-NEXT:    vpand %xmm2, %xmm1, %xmm0
424; X64-NEXT:    vmovdqa %xmm0, (%rcx)
425; X64-NEXT:    retq
426  %i0.src = load i32, ptr %origin0
427  %i0 = and i32 %i0.src, 15
428  %i1.src = load i32, ptr %origin1
429  %i1 = and i32 %i0.src, 15
430  %i2 = insertelement <4 x i32> poison, i32 %i0, i64 1
431  %i3 = and <4 x i32> %i2, <i32 7, i32 7, i32 7, i32 7>
432  %i4 = freeze <4 x i32> %i3
433  store <4 x i32> %i4, ptr %dst0
434  %i5 = insertelement <4 x i32> poison, i32 %i1, i64 2
435  %i6 = and <4 x i32> %i5, <i32 7, i32 7, i32 7, i32 7>
436  store <4 x i32> %i6, ptr %dst1
437  ret void
438}
439
440define void @freeze_two_buildvectors_one_undef_elt(ptr %origin0, ptr %origin1, ptr %dst0, ptr %dst1) nounwind {
441; X86-LABEL: freeze_two_buildvectors_one_undef_elt:
442; X86:       # %bb.0:
443; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
444; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
445; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
446; X86-NEXT:    movl (%edx), %edx
447; X86-NEXT:    andl $15, %edx
448; X86-NEXT:    vmovddup {{.*#+}} xmm0 = [7,0,7,0]
449; X86-NEXT:    # xmm0 = mem[0,0]
450; X86-NEXT:    vmovd %edx, %xmm1
451; X86-NEXT:    vpand %xmm0, %xmm1, %xmm2
452; X86-NEXT:    vmovdqa %xmm2, (%ecx)
453; X86-NEXT:    vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
454; X86-NEXT:    vpand %xmm0, %xmm1, %xmm0
455; X86-NEXT:    vmovdqa %xmm0, (%eax)
456; X86-NEXT:    retl
457;
458; X64-LABEL: freeze_two_buildvectors_one_undef_elt:
459; X64:       # %bb.0:
460; X64-NEXT:    movq (%rdi), %rax
461; X64-NEXT:    vmovd %eax, %xmm0
462; X64-NEXT:    vpbroadcastd %xmm0, %xmm0
463; X64-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
464; X64-NEXT:    vmovdqa %xmm0, (%rdx)
465; X64-NEXT:    vmovdqa %xmm0, (%rcx)
466; X64-NEXT:    retq
467  %i0.src = load i64, ptr %origin0
468  %i0 = and i64 %i0.src, 15
469  %i1.src = load i64, ptr %origin1
470  %i1 = and i64 %i0.src, 15
471  %i2 = insertelement <2 x i64> poison, i64 %i0, i64 0
472  %i3 = and <2 x i64> %i2, <i64 7, i64 7>
473  %i4 = freeze <2 x i64> %i3
474  store <2 x i64> %i4, ptr %dst0
475  %i5 = insertelement <2 x i64> poison, i64 %i1, i64 1
476  %i6 = and <2 x i64> %i5, <i64 7, i64 7>
477  store <2 x i64> %i6, ptr %dst1
478  ret void
479}
480
481define void @freeze_buildvector(ptr %origin0, ptr %origin1, ptr %origin2, ptr %origin3, ptr %dst) nounwind {
482; X86-LABEL: freeze_buildvector:
483; X86:       # %bb.0:
484; X86-NEXT:    pushl %edi
485; X86-NEXT:    pushl %esi
486; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
487; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
488; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
489; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
490; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
491; X86-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
492; X86-NEXT:    vpinsrd $1, (%esi), %xmm0, %xmm0
493; X86-NEXT:    vpinsrd $2, (%edx), %xmm0, %xmm0
494; X86-NEXT:    vpinsrd $3, (%ecx), %xmm0, %xmm0
495; X86-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
496; X86-NEXT:    vmovdqa %xmm0, (%eax)
497; X86-NEXT:    popl %esi
498; X86-NEXT:    popl %edi
499; X86-NEXT:    retl
500;
501; X64-LABEL: freeze_buildvector:
502; X64:       # %bb.0:
503; X64-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
504; X64-NEXT:    vpinsrd $1, (%rsi), %xmm0, %xmm0
505; X64-NEXT:    vpinsrd $2, (%rdx), %xmm0, %xmm0
506; X64-NEXT:    vpinsrd $3, (%rcx), %xmm0, %xmm0
507; X64-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7]
508; X64-NEXT:    vpand %xmm1, %xmm0, %xmm0
509; X64-NEXT:    vmovdqa %xmm0, (%r8)
510; X64-NEXT:    retq
511  %i0.src = load i32, ptr %origin0
512  %i1.src = load i32, ptr %origin1
513  %i2.src = load i32, ptr %origin2
514  %i3.src = load i32, ptr %origin3
515  %i0 = and i32 %i0.src, 15
516  %i1 = and i32 %i1.src, 15
517  %i2 = and i32 %i2.src, 15
518  %i3 = and i32 %i3.src, 15
519  %i4 = insertelement <4 x i32> poison, i32 %i0, i64 0
520  %i5 = insertelement <4 x i32> %i4, i32 %i1, i64 1
521  %i6 = insertelement <4 x i32> %i5, i32 %i2, i64 2
522  %i7 = insertelement <4 x i32> %i6, i32 %i3, i64 3
523  %i8 = freeze <4 x i32> %i7
524  %i9 = and <4 x i32> %i8, <i32 7, i32 7, i32 7, i32 7>
525  store <4 x i32> %i9, ptr %dst
526  ret void
527}
528
529define void @freeze_buildvector_one_undef_elt(ptr %origin0, ptr %origin1, ptr %origin2, ptr %origin3, ptr %dst) nounwind {
530; X86-LABEL: freeze_buildvector_one_undef_elt:
531; X86:       # %bb.0:
532; X86-NEXT:    pushl %esi
533; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
534; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
535; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
536; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
537; X86-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
538; X86-NEXT:    vpinsrd $1, (%edx), %xmm0, %xmm0
539; X86-NEXT:    vpinsrd $2, %eax, %xmm0, %xmm0
540; X86-NEXT:    vpinsrd $3, (%ecx), %xmm0, %xmm0
541; X86-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
542; X86-NEXT:    vmovdqa %xmm0, (%eax)
543; X86-NEXT:    popl %esi
544; X86-NEXT:    retl
545;
546; X64-LABEL: freeze_buildvector_one_undef_elt:
547; X64:       # %bb.0:
548; X64-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
549; X64-NEXT:    vpinsrd $1, (%rsi), %xmm0, %xmm0
550; X64-NEXT:    vpinsrd $2, %eax, %xmm0, %xmm0
551; X64-NEXT:    vpinsrd $3, (%rcx), %xmm0, %xmm0
552; X64-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7]
553; X64-NEXT:    vpand %xmm1, %xmm0, %xmm0
554; X64-NEXT:    vmovdqa %xmm0, (%r8)
555; X64-NEXT:    retq
556  %i0.src = load i32, ptr %origin0
557  %i1.src = load i32, ptr %origin1
558  %i3.src = load i32, ptr %origin3
559  %i0 = and i32 %i0.src, 15
560  %i1 = and i32 %i1.src, 15
561  %i3 = and i32 %i3.src, 15
562  %i4 = insertelement <4 x i32> poison, i32 %i0, i64 0
563  %i5 = insertelement <4 x i32> %i4, i32 %i1, i64 1
564  %i7 = insertelement <4 x i32> %i5, i32 %i3, i64 3
565  %i8 = freeze <4 x i32> %i7
566  %i9 = and <4 x i32> %i8, <i32 7, i32 7, i32 7, i32 7>
567  store <4 x i32> %i9, ptr %dst
568  ret void
569}
570
571define void @freeze_buildvector_extrause(ptr %origin0, ptr %origin1, ptr %origin2, ptr %origin3, ptr %dst, ptr %escape) nounwind {
572; X86-LABEL: freeze_buildvector_extrause:
573; X86:       # %bb.0:
574; X86-NEXT:    pushl %ebx
575; X86-NEXT:    pushl %edi
576; X86-NEXT:    pushl %esi
577; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
578; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
579; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
580; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
581; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
582; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
583; X86-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
584; X86-NEXT:    vpinsrd $1, (%edi), %xmm0, %xmm0
585; X86-NEXT:    vpinsrd $2, (%esi), %xmm0, %xmm0
586; X86-NEXT:    vpinsrd $3, (%edx), %xmm0, %xmm0
587; X86-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
588; X86-NEXT:    vmovdqa %xmm0, (%ecx)
589; X86-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
590; X86-NEXT:    vmovdqa %xmm0, (%eax)
591; X86-NEXT:    popl %esi
592; X86-NEXT:    popl %edi
593; X86-NEXT:    popl %ebx
594; X86-NEXT:    retl
595;
596; X64-LABEL: freeze_buildvector_extrause:
597; X64:       # %bb.0:
598; X64-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
599; X64-NEXT:    vpinsrd $1, (%rsi), %xmm0, %xmm0
600; X64-NEXT:    vpinsrd $2, (%rdx), %xmm0, %xmm0
601; X64-NEXT:    vpinsrd $3, (%rcx), %xmm0, %xmm0
602; X64-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15]
603; X64-NEXT:    vpand %xmm1, %xmm0, %xmm0
604; X64-NEXT:    vmovdqa %xmm0, (%r9)
605; X64-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7]
606; X64-NEXT:    vpand %xmm1, %xmm0, %xmm0
607; X64-NEXT:    vmovdqa %xmm0, (%r8)
608; X64-NEXT:    retq
609  %i0.src = load i32, ptr %origin0
610  %i1.src = load i32, ptr %origin1
611  %i2.src = load i32, ptr %origin2
612  %i3.src = load i32, ptr %origin3
613  %i0 = and i32 %i0.src, 15
614  %i1 = and i32 %i1.src, 15
615  %i2 = and i32 %i2.src, 15
616  %i3 = and i32 %i3.src, 15
617  %i4 = insertelement <4 x i32> poison, i32 %i0, i64 0
618  %i5 = insertelement <4 x i32> %i4, i32 %i1, i64 1
619  %i6 = insertelement <4 x i32> %i5, i32 %i2, i64 2
620  %i7 = insertelement <4 x i32> %i6, i32 %i3, i64 3
621  store <4 x i32> %i7, ptr %escape
622  %i8 = freeze <4 x i32> %i7
623  %i9 = and <4 x i32> %i8, <i32 7, i32 7, i32 7, i32 7>
624  store <4 x i32> %i9, ptr %dst
625  ret void
626}
627
628define void @pr59677(i32 %x, ptr %out) nounwind {
629; X86-LABEL: pr59677:
630; X86:       # %bb.0:
631; X86-NEXT:    pushl %esi
632; X86-NEXT:    pushl %eax
633; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
634; X86-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
635; X86-NEXT:    vpaddd %xmm0, %xmm0, %xmm0
636; X86-NEXT:    vcvtdq2ps %xmm0, %xmm0
637; X86-NEXT:    vmovss %xmm0, (%esp)
638; X86-NEXT:    calll sinf
639; X86-NEXT:    fstps (%esi)
640; X86-NEXT:    addl $4, %esp
641; X86-NEXT:    popl %esi
642; X86-NEXT:    retl
643;
644; X64-LABEL: pr59677:
645; X64:       # %bb.0:
646; X64-NEXT:    pushq %rbx
647; X64-NEXT:    movq %rsi, %rbx
648; X64-NEXT:    vmovd %edi, %xmm0
649; X64-NEXT:    vpaddd %xmm0, %xmm0, %xmm0
650; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
651; X64-NEXT:    callq sinf@PLT
652; X64-NEXT:    vmovss %xmm0, (%rbx)
653; X64-NEXT:    popq %rbx
654; X64-NEXT:    retq
655  %i0 = or i32 %x, 1
656  %i1 = insertelement <4 x i32> zeroinitializer, i32 %x, i64 0
657  %i2 = insertelement <4 x i32> %i1, i32 %i0, i64 1
658  %i3 = shl <4 x i32> %i2, <i32 1, i32 1, i32 1, i32 1>
659  %i4 = sitofp <4 x i32> %i3 to <4 x float>
660  %i5 = tail call <4 x float> @llvm.sin.v4f32(<4 x float> %i4)
661  %i6 = extractelement <4 x float> %i5, i64 0
662  store float %i6, ptr %out, align 4
663  ret void
664}
665declare <4 x float> @llvm.sin.v4f32(<4 x float>)
666
667; Test that we can eliminate freeze by changing the BUILD_VECTOR to a splat
668; zero vector.
669define void @freeze_buildvector_not_simple_type(ptr %dst) nounwind {
670; X86-LABEL: freeze_buildvector_not_simple_type:
671; X86:       # %bb.0:
672; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
673; X86-NEXT:    movb $0, 4(%eax)
674; X86-NEXT:    movl $0, (%eax)
675; X86-NEXT:    retl
676;
677; X64-LABEL: freeze_buildvector_not_simple_type:
678; X64:       # %bb.0:
679; X64-NEXT:    movb $0, 4(%rdi)
680; X64-NEXT:    movl $0, (%rdi)
681; X64-NEXT:    retq
682  %i0 = freeze <5 x i8> <i8 poison, i8 0, i8 0, i8 undef, i8 0>
683  store <5 x i8> %i0, ptr %dst
684  ret void
685}
686