xref: /llvm-project/llvm/test/CodeGen/X86/combine-bitreverse.ll (revision 522b4bfe5b140068a7f9b3d899740a460aa0e715)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86
3; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=X64
4
5; These tests just check that the plumbing is in place for @llvm.bitreverse. The
6; actual output is massive at the moment as llvm.bitreverse is not yet legal.
7
8declare i32 @llvm.bitreverse.i32(i32) readnone
9declare i64 @llvm.bitreverse.i64(i64) readnone
10declare <4 x i32> @llvm.bitreverse.v4i32(<4 x i32>) readnone
11declare i32 @llvm.bswap.i32(i32) readnone
12
13; fold (bitreverse undef) -> undef
14define i32 @test_undef() nounwind {
15; X86-LABEL: test_undef:
16; X86:       # %bb.0:
17; X86-NEXT:    retl
18;
19; X64-LABEL: test_undef:
20; X64:       # %bb.0:
21; X64-NEXT:    retq
22  %b = call i32 @llvm.bitreverse.i32(i32 undef)
23  ret i32 %b
24}
25
26; fold (bitreverse (bitreverse x)) -> x
27define i32 @test_bitreverse_bitreverse(i32 %a0) nounwind {
28; X86-LABEL: test_bitreverse_bitreverse:
29; X86:       # %bb.0:
30; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
31; X86-NEXT:    retl
32;
33; X64-LABEL: test_bitreverse_bitreverse:
34; X64:       # %bb.0:
35; X64-NEXT:    movl %edi, %eax
36; X64-NEXT:    retq
37  %b = call i32 @llvm.bitreverse.i32(i32 %a0)
38  %c = call i32 @llvm.bitreverse.i32(i32 %b)
39  ret i32 %c
40}
41
42; fold (bitreverse(srl (bitreverse c), x)) -> (shl c, x)
43define i32 @test_bitreverse_srli_bitreverse(i32 %a0) nounwind {
44; X86-LABEL: test_bitreverse_srli_bitreverse:
45; X86:       # %bb.0:
46; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
47; X86-NEXT:    shll $7, %eax
48; X86-NEXT:    retl
49;
50; X64-LABEL: test_bitreverse_srli_bitreverse:
51; X64:       # %bb.0:
52; X64-NEXT:    movl %edi, %eax
53; X64-NEXT:    shll $7, %eax
54; X64-NEXT:    retq
55  %b = call i32 @llvm.bitreverse.i32(i32 %a0)
56  %c = lshr i32 %b, 7
57  %d = call i32 @llvm.bitreverse.i32(i32 %c)
58  ret i32 %d
59}
60
61define i64 @test_bitreverse_srli_bitreverse_i64(i64 %a) nounwind {
62; X86-LABEL: test_bitreverse_srli_bitreverse_i64:
63; X86:       # %bb.0:
64; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
65; X86-NEXT:    addl %edx, %edx
66; X86-NEXT:    xorl %eax, %eax
67; X86-NEXT:    retl
68;
69; X64-LABEL: test_bitreverse_srli_bitreverse_i64:
70; X64:       # %bb.0:
71; X64-NEXT:    movq %rdi, %rax
72; X64-NEXT:    shlq $33, %rax
73; X64-NEXT:    retq
74    %1 = call i64 @llvm.bitreverse.i64(i64 %a)
75    %2 = lshr i64 %1, 33
76    %3 = call i64 @llvm.bitreverse.i64(i64 %2)
77    ret i64 %3
78}
79
80; fold (bitreverse(shl (bitreverse c), x)) -> (srl c, x)
81define i32 @test_bitreverse_shli_bitreverse(i32 %a0) nounwind {
82; X86-LABEL: test_bitreverse_shli_bitreverse:
83; X86:       # %bb.0:
84; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
85; X86-NEXT:    shrl $7, %eax
86; X86-NEXT:    retl
87;
88; X64-LABEL: test_bitreverse_shli_bitreverse:
89; X64:       # %bb.0:
90; X64-NEXT:    movl %edi, %eax
91; X64-NEXT:    shrl $7, %eax
92; X64-NEXT:    retq
93  %b = call i32 @llvm.bitreverse.i32(i32 %a0)
94  %c = shl i32 %b, 7
95  %d = call i32 @llvm.bitreverse.i32(i32 %c)
96  ret i32 %d
97}
98
99define i64 @test_bitreverse_shli_bitreverse_i64(i64 %a) nounwind {
100; X86-LABEL: test_bitreverse_shli_bitreverse_i64:
101; X86:       # %bb.0:
102; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
103; X86-NEXT:    shrl %eax
104; X86-NEXT:    xorl %edx, %edx
105; X86-NEXT:    retl
106;
107; X64-LABEL: test_bitreverse_shli_bitreverse_i64:
108; X64:       # %bb.0:
109; X64-NEXT:    movq %rdi, %rax
110; X64-NEXT:    shrq $33, %rax
111; X64-NEXT:    retq
112    %1 = call i64 @llvm.bitreverse.i64(i64 %a)
113    %2 = shl i64 %1, 33
114    %3 = call i64 @llvm.bitreverse.i64(i64 %2)
115    ret i64 %3
116}
117
118define <4 x i32> @test_demandedbits_bitreverse(<4 x i32> %a0) nounwind {
119; X86-LABEL: test_demandedbits_bitreverse:
120; X86:       # %bb.0:
121; X86-NEXT:    pxor %xmm1, %xmm1
122; X86-NEXT:    movdqa %xmm0, %xmm2
123; X86-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
124; X86-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[3,2,1,0,4,5,6,7]
125; X86-NEXT:    pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,7,6,5,4]
126; X86-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
127; X86-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
128; X86-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
129; X86-NEXT:    packuswb %xmm2, %xmm0
130; X86-NEXT:    movdqa %xmm0, %xmm1
131; X86-NEXT:    psrlw $4, %xmm1
132; X86-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
133; X86-NEXT:    pand %xmm2, %xmm1
134; X86-NEXT:    pand %xmm2, %xmm0
135; X86-NEXT:    psllw $4, %xmm0
136; X86-NEXT:    por %xmm1, %xmm0
137; X86-NEXT:    movdqa %xmm0, %xmm1
138; X86-NEXT:    psrlw $2, %xmm1
139; X86-NEXT:    movdqa {{.*#+}} xmm2 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
140; X86-NEXT:    pand %xmm2, %xmm1
141; X86-NEXT:    pand %xmm2, %xmm0
142; X86-NEXT:    psllw $2, %xmm0
143; X86-NEXT:    por %xmm1, %xmm0
144; X86-NEXT:    movdqa %xmm0, %xmm1
145; X86-NEXT:    psrlw $1, %xmm1
146; X86-NEXT:    movdqa {{.*#+}} xmm2 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85]
147; X86-NEXT:    pand %xmm2, %xmm1
148; X86-NEXT:    pand %xmm2, %xmm0
149; X86-NEXT:    paddb %xmm0, %xmm0
150; X86-NEXT:    por %xmm1, %xmm0
151; X86-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
152; X86-NEXT:    retl
153;
154; X64-LABEL: test_demandedbits_bitreverse:
155; X64:       # %bb.0:
156; X64-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
157; X64-NEXT:    vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
158; X64-NEXT:    vpand %xmm1, %xmm0, %xmm2
159; X64-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240]
160; X64-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
161; X64-NEXT:    vpsrlw $4, %xmm0, %xmm0
162; X64-NEXT:    vpand %xmm1, %xmm0, %xmm0
163; X64-NEXT:    vmovdqa {{.*#+}} xmm1 = [0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15]
164; X64-NEXT:    vpshufb %xmm0, %xmm1, %xmm0
165; X64-NEXT:    vpor %xmm0, %xmm2, %xmm0
166; X64-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
167; X64-NEXT:    retq
168  %b = or <4 x i32> %a0, <i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648>
169  %c = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %b)
170  %d = and <4 x i32> %c, <i32 -2, i32 -2, i32 -2, i32 -2>
171  ret <4 x i32> %d
172}
173