xref: /llvm-project/llvm/test/CodeGen/X86/freeze-unary.ll (revision 27241435515554bc21105713c9d34cf886c5bced)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-- -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X86
3; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,X64
4
5define i32 @freeze_sext(i8 %a0) nounwind {
6; X86-LABEL: freeze_sext:
7; X86:       # %bb.0:
8; X86-NEXT:    movsbl {{[0-9]+}}(%esp), %eax
9; X86-NEXT:    retl
10;
11; X64-LABEL: freeze_sext:
12; X64:       # %bb.0:
13; X64-NEXT:    movsbl %dil, %eax
14; X64-NEXT:    retq
15  %x = sext i8 %a0 to i16
16  %y = freeze i16 %x
17  %z = sext i16 %y to i32
18  ret i32 %z
19}
20
21define <4 x i32> @freeze_sext_vec(<4 x i8> %a0) nounwind {
22; X86-LABEL: freeze_sext_vec:
23; X86:       # %bb.0:
24; X86-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
25; X86-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
26; X86-NEXT:    psrad $24, %xmm0
27; X86-NEXT:    retl
28;
29; X64-LABEL: freeze_sext_vec:
30; X64:       # %bb.0:
31; X64-NEXT:    pmovsxbd %xmm0, %xmm0
32; X64-NEXT:    retq
33  %x = sext <4 x i8> %a0 to <4 x i16>
34  %y = freeze <4 x i16> %x
35  %z = sext <4 x i16> %y to <4 x i32>
36  ret <4 x i32> %z
37}
38
39define i32 @freeze_zext(i8 %a0) nounwind {
40; X86-LABEL: freeze_zext:
41; X86:       # %bb.0:
42; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
43; X86-NEXT:    retl
44;
45; X64-LABEL: freeze_zext:
46; X64:       # %bb.0:
47; X64-NEXT:    movzbl %dil, %eax
48; X64-NEXT:    retq
49  %x = zext i8 %a0 to i16
50  %y = freeze i16 %x
51  %z = zext i16 %y to i32
52  ret i32 %z
53}
54
55define <2 x i64> @freeze_zext_vec(<2 x i16> %a0) nounwind {
56; X86-LABEL: freeze_zext_vec:
57; X86:       # %bb.0:
58; X86-NEXT:    pxor %xmm1, %xmm1
59; X86-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
60; X86-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
61; X86-NEXT:    retl
62;
63; X64-LABEL: freeze_zext_vec:
64; X64:       # %bb.0:
65; X64-NEXT:    pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
66; X64-NEXT:    retq
67  %x = zext <2 x i16> %a0 to <2 x i32>
68  %y = freeze <2 x i32> %x
69  %z = zext <2 x i32> %y to <2 x i64>
70  ret <2 x i64> %z
71}
72
73define i32 @freeze_bswap(i32 %a0) nounwind {
74; X86-LABEL: freeze_bswap:
75; X86:       # %bb.0:
76; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
77; X86-NEXT:    retl
78;
79; X64-LABEL: freeze_bswap:
80; X64:       # %bb.0:
81; X64-NEXT:    movl %edi, %eax
82; X64-NEXT:    retq
83  %x = call i32 @llvm.bswap.i32(i32 %a0)
84  %y = freeze i32 %x
85  %z = call i32 @llvm.bswap.i32(i32 %y)
86  ret i32 %z
87}
88declare i32 @llvm.bswap.i32(i32)
89
90define <4 x i32> @freeze_bswap_vec(<4 x i32> %a0) nounwind {
91; CHECK-LABEL: freeze_bswap_vec:
92; CHECK:       # %bb.0:
93; CHECK-NEXT:    ret{{[l|q]}}
94  %x = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a0)
95  %y = freeze <4 x i32> %x
96  %z = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %y)
97  ret <4 x i32> %z
98}
99declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
100
101define i32 @freeze_bitreverse(i32 %a0) nounwind {
102; X86-LABEL: freeze_bitreverse:
103; X86:       # %bb.0:
104; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
105; X86-NEXT:    retl
106;
107; X64-LABEL: freeze_bitreverse:
108; X64:       # %bb.0:
109; X64-NEXT:    movl %edi, %eax
110; X64-NEXT:    retq
111  %x = call i32 @llvm.bitreverse.i32(i32 %a0)
112  %y = freeze i32 %x
113  %z = call i32 @llvm.bitreverse.i32(i32 %y)
114  ret i32 %z
115}
116declare i32 @llvm.bitreverse.i32(i32)
117
118define <4 x i32> @freeze_bitreverse_vec(<4 x i32> %a0) nounwind {
119; CHECK-LABEL: freeze_bitreverse_vec:
120; CHECK:       # %bb.0:
121; CHECK-NEXT:    ret{{[l|q]}}
122  %x = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %a0)
123  %y = freeze <4 x i32> %x
124  %z = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %y)
125  ret <4 x i32> %z
126}
127declare <4 x i32> @llvm.bitreverse.v4i32(<4 x i32>)
128
129; split parity pattern
130define i8 @freeze_ctpop(i8 %a0) nounwind {
131; X86-LABEL: freeze_ctpop:
132; X86:       # %bb.0:
133; X86-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
134; X86-NEXT:    setnp %al
135; X86-NEXT:    retl
136;
137; X64-LABEL: freeze_ctpop:
138; X64:       # %bb.0:
139; X64-NEXT:    testb %dil, %dil
140; X64-NEXT:    setnp %al
141; X64-NEXT:    retq
142  %x = call i8 @llvm.ctpop.i8(i8 %a0)
143  %y = freeze i8 %x
144  %z = and i8 %y, 1
145  ret i8 %z
146}
147declare i8 @llvm.ctpop.i8(i8)
148
149define <16 x i8> @freeze_ctpop_vec(<16 x i8> %a0) nounwind {
150; X86-LABEL: freeze_ctpop_vec:
151; X86:       # %bb.0:
152; X86-NEXT:    movdqa %xmm0, %xmm1
153; X86-NEXT:    psrlw $1, %xmm1
154; X86-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
155; X86-NEXT:    psubb %xmm1, %xmm0
156; X86-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
157; X86-NEXT:    movdqa %xmm0, %xmm2
158; X86-NEXT:    pand %xmm1, %xmm2
159; X86-NEXT:    psrlw $2, %xmm0
160; X86-NEXT:    pand %xmm1, %xmm0
161; X86-NEXT:    paddb %xmm2, %xmm0
162; X86-NEXT:    movdqa %xmm0, %xmm1
163; X86-NEXT:    psrlw $4, %xmm1
164; X86-NEXT:    paddb %xmm1, %xmm0
165; X86-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
166; X86-NEXT:    retl
167;
168; X64-LABEL: freeze_ctpop_vec:
169; X64:       # %bb.0:
170; X64-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
171; X64-NEXT:    movdqa %xmm0, %xmm3
172; X64-NEXT:    pand %xmm2, %xmm3
173; X64-NEXT:    movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
174; X64-NEXT:    movdqa %xmm1, %xmm4
175; X64-NEXT:    pshufb %xmm3, %xmm4
176; X64-NEXT:    psrlw $4, %xmm0
177; X64-NEXT:    pand %xmm2, %xmm0
178; X64-NEXT:    pshufb %xmm0, %xmm1
179; X64-NEXT:    paddb %xmm4, %xmm1
180; X64-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
181; X64-NEXT:    movdqa %xmm1, %xmm0
182; X64-NEXT:    retq
183  %x = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a0)
184  %y = freeze <16 x i8> %x
185  %z = and <16 x i8> %y, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
186  ret <16 x i8> %z
187}
188declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>)
189
190; parity knownbits pattern
191define i8 @freeze_parity(i8 %a0) nounwind {
192; X86-LABEL: freeze_parity:
193; X86:       # %bb.0:
194; X86-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
195; X86-NEXT:    setnp %al
196; X86-NEXT:    retl
197;
198; X64-LABEL: freeze_parity:
199; X64:       # %bb.0:
200; X64-NEXT:    testb %dil, %dil
201; X64-NEXT:    setnp %al
202; X64-NEXT:    retq
203  %x = call i8 @llvm.ctpop.i8(i8 %a0)
204  %y = and i8 %x, 1
205  %z = freeze i8 %y
206  %w = and i8 %z, 1
207  ret i8 %w
208}
209
210define <16 x i8> @freeze_parity_vec(<16 x i8> %a0) nounwind {
211; X86-LABEL: freeze_parity_vec:
212; X86:       # %bb.0:
213; X86-NEXT:    movdqa %xmm0, %xmm1
214; X86-NEXT:    psrlw $1, %xmm1
215; X86-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
216; X86-NEXT:    psubb %xmm1, %xmm0
217; X86-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
218; X86-NEXT:    movdqa %xmm0, %xmm2
219; X86-NEXT:    pand %xmm1, %xmm2
220; X86-NEXT:    psrlw $2, %xmm0
221; X86-NEXT:    pand %xmm1, %xmm0
222; X86-NEXT:    paddb %xmm2, %xmm0
223; X86-NEXT:    movdqa %xmm0, %xmm1
224; X86-NEXT:    psrlw $4, %xmm1
225; X86-NEXT:    paddb %xmm1, %xmm0
226; X86-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
227; X86-NEXT:    retl
228;
229; X64-LABEL: freeze_parity_vec:
230; X64:       # %bb.0:
231; X64-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
232; X64-NEXT:    movdqa %xmm0, %xmm3
233; X64-NEXT:    pand %xmm2, %xmm3
234; X64-NEXT:    movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
235; X64-NEXT:    movdqa %xmm1, %xmm4
236; X64-NEXT:    pshufb %xmm3, %xmm4
237; X64-NEXT:    psrlw $4, %xmm0
238; X64-NEXT:    pand %xmm2, %xmm0
239; X64-NEXT:    pshufb %xmm0, %xmm1
240; X64-NEXT:    paddb %xmm4, %xmm1
241; X64-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
242; X64-NEXT:    movdqa %xmm1, %xmm0
243; X64-NEXT:    retq
244  %x = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a0)
245  %y = and <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
246  %z = freeze <16 x i8> %y
247  %w = and <16 x i8> %z, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
248  ret <16 x i8> %z
249}
250