xref: /llvm-project/llvm/test/CodeGen/X86/combine-or.ll (revision 90e9895a9373b3d83eefe15b34d2dc83c7bcc88f)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 | FileCheck %s -check-prefixes=CHECK,SSE
3; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -early-live-intervals | FileCheck %s -check-prefixes=CHECK,SSE
4; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx | FileCheck %s -check-prefixes=CHECK,AVX,AVX1
5; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=x86-64-v3 | FileCheck %s -check-prefixes=CHECK,AVX,AVX2
6
7define i32 @or_self(i32 %x) {
8; CHECK-LABEL: or_self:
9; CHECK:       # %bb.0:
10; CHECK-NEXT:    movl %edi, %eax
11; CHECK-NEXT:    retq
12  %or = or i32 %x, %x
13  ret i32 %or
14}
15
16define <4 x i32> @or_self_vec(<4 x i32> %x) {
17; CHECK-LABEL: or_self_vec:
18; CHECK:       # %bb.0:
19; CHECK-NEXT:    retq
20  %or = or <4 x i32> %x, %x
21  ret <4 x i32> %or
22}
23
24; fold (or x, c) -> c iff (x & ~c) == 0
25
26define <2 x i64> @or_zext_v2i32(<2 x i32> %a0) {
27; SSE-LABEL: or_zext_v2i32:
28; SSE:       # %bb.0:
29; SSE-NEXT:    movaps {{.*#+}} xmm0 = [4294967295,4294967295]
30; SSE-NEXT:    retq
31;
32; AVX1-LABEL: or_zext_v2i32:
33; AVX1:       # %bb.0:
34; AVX1-NEXT:    vpmovsxbd {{.*#+}} xmm0 = [4294967295,0,4294967295,0]
35; AVX1-NEXT:    retq
36;
37; AVX2-LABEL: or_zext_v2i32:
38; AVX2:       # %bb.0:
39; AVX2-NEXT:    vmovddup {{.*#+}} xmm0 = [4294967295,4294967295]
40; AVX2-NEXT:    # xmm0 = mem[0,0]
41; AVX2-NEXT:    retq
42  %1 = zext <2 x i32> %a0 to <2 x i64>
43  %2 = or <2 x i64> %1, <i64 4294967295, i64 4294967295>
44  ret <2 x i64> %2
45}
46
47define <4 x i32> @or_zext_v4i16(<4 x i16> %a0) {
48; SSE-LABEL: or_zext_v4i16:
49; SSE:       # %bb.0:
50; SSE-NEXT:    movaps {{.*#+}} xmm0 = [65535,65535,65535,65535]
51; SSE-NEXT:    retq
52;
53; AVX-LABEL: or_zext_v4i16:
54; AVX:       # %bb.0:
55; AVX-NEXT:    vbroadcastss {{.*#+}} xmm0 = [65535,65535,65535,65535]
56; AVX-NEXT:    retq
57  %1 = zext <4 x i16> %a0 to <4 x i32>
58  %2 = or <4 x i32> %1, <i32 65535, i32 65535, i32 65535, i32 65535>
59  ret <4 x i32> %2
60}
61
62; fold (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2))
63
64define i32 @or_and_and_i32(i32 %x, i32 %y) {
65; CHECK-LABEL: or_and_and_i32:
66; CHECK:       # %bb.0:
67; CHECK-NEXT:    movl %edi, %eax
68; CHECK-NEXT:    andl $-11, %esi
69; CHECK-NEXT:    andl $-3, %eax
70; CHECK-NEXT:    orl %esi, %eax
71; CHECK-NEXT:    retq
72  %xy = or i32 %x, %y
73  %mx = and i32 %x, 8
74  %mxy = and i32 %xy, -11
75  %r = or i32 %mx, %mxy
76  ret i32 %r
77}
78
79define i64 @or_and_and_commute_i64(i64 %x, i64 %y) {
80; CHECK-LABEL: or_and_and_commute_i64:
81; CHECK:       # %bb.0:
82; CHECK-NEXT:    movq %rdi, %rax
83; CHECK-NEXT:    orq %rsi, %rax
84; CHECK-NEXT:    andq $-3, %rax
85; CHECK-NEXT:    retq
86  %xy = or i64 %x, %y
87  %mx = and i64 %x, 8
88  %mxy = and i64 %xy, -3
89  %r = or i64 %mxy, %mx
90  ret i64 %r
91}
92
93define <4 x i32> @or_and_and_v4i32(<4 x i32> %x, <4 x i32> %y) {
94; SSE-LABEL: or_and_and_v4i32:
95; SSE:       # %bb.0:
96; SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
97; SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
98; SSE-NEXT:    orps %xmm1, %xmm0
99; SSE-NEXT:    retq
100;
101; AVX-LABEL: or_and_and_v4i32:
102; AVX:       # %bb.0:
103; AVX-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
104; AVX-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
105; AVX-NEXT:    vorps %xmm1, %xmm0, %xmm0
106; AVX-NEXT:    retq
107  %xy = or <4 x i32> %x, %y
108  %mx = and <4 x i32> %x, <i32 2, i32 4, i32 8, i32 16>
109  %mxy = and <4 x i32> %xy, <i32 1, i32 -1, i32 -5, i32 -25>
110  %r = or <4 x i32> %mx, %mxy
111  ret <4 x i32> %r
112}
113
114define i32 @or_and_and_multiuse_i32(i32 %x, i32 %y) nounwind {
115; CHECK-LABEL: or_and_and_multiuse_i32:
116; CHECK:       # %bb.0:
117; CHECK-NEXT:    pushq %rbx
118; CHECK-NEXT:    # kill: def $esi killed $esi def $rsi
119; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
120; CHECK-NEXT:    orl %edi, %esi
121; CHECK-NEXT:    andl $8, %edi
122; CHECK-NEXT:    andl $-11, %esi
123; CHECK-NEXT:    leal (%rdi,%rsi), %ebx
124; CHECK-NEXT:    movl %esi, %edi
125; CHECK-NEXT:    callq use_i32@PLT
126; CHECK-NEXT:    movl %ebx, %eax
127; CHECK-NEXT:    popq %rbx
128; CHECK-NEXT:    retq
129  %xy = or i32 %x, %y
130  %mx = and i32 %x, 8
131  %mxy = and i32 %xy, -11
132  %r = or i32 %mx, %mxy
133  call void @use_i32(i32 %mxy)
134  ret i32 %r
135}
136
137define i32 @or_and_multiuse_and_i32(i32 %x, i32 %y) nounwind {
138; CHECK-LABEL: or_and_multiuse_and_i32:
139; CHECK:       # %bb.0:
140; CHECK-NEXT:    pushq %rbx
141; CHECK-NEXT:    # kill: def $esi killed $esi def $rsi
142; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
143; CHECK-NEXT:    orl %edi, %esi
144; CHECK-NEXT:    andl $8, %edi
145; CHECK-NEXT:    andl $-11, %esi
146; CHECK-NEXT:    leal (%rsi,%rdi), %ebx
147; CHECK-NEXT:    # kill: def $edi killed $edi killed $rdi
148; CHECK-NEXT:    callq use_i32@PLT
149; CHECK-NEXT:    movl %ebx, %eax
150; CHECK-NEXT:    popq %rbx
151; CHECK-NEXT:    retq
152  %xy = or i32 %x, %y
153  %mx = and i32 %x, 8
154  %mxy = and i32 %xy, -11
155  %r = or i32 %mx, %mxy
156  call void @use_i32(i32 %mx)
157  ret i32 %r
158}
159
160define i32 @or_and_multiuse_and_multiuse_i32(i32 %x, i32 %y) nounwind {
161; CHECK-LABEL: or_and_multiuse_and_multiuse_i32:
162; CHECK:       # %bb.0:
163; CHECK-NEXT:    pushq %rbp
164; CHECK-NEXT:    pushq %rbx
165; CHECK-NEXT:    pushq %rax
166; CHECK-NEXT:    movl %esi, %ebx
167; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
168; CHECK-NEXT:    orl %edi, %ebx
169; CHECK-NEXT:    andl $8, %edi
170; CHECK-NEXT:    andl $-11, %ebx
171; CHECK-NEXT:    leal (%rdi,%rbx), %ebp
172; CHECK-NEXT:    # kill: def $edi killed $edi killed $rdi
173; CHECK-NEXT:    callq use_i32@PLT
174; CHECK-NEXT:    movl %ebx, %edi
175; CHECK-NEXT:    callq use_i32@PLT
176; CHECK-NEXT:    movl %ebp, %eax
177; CHECK-NEXT:    addq $8, %rsp
178; CHECK-NEXT:    popq %rbx
179; CHECK-NEXT:    popq %rbp
180; CHECK-NEXT:    retq
181  %xy = or i32 %x, %y
182  %mx = and i32 %x, 8
183  %mxy = and i32 %xy, -11
184  %r = or i32 %mx, %mxy
185  call void @use_i32(i32 %mx)
186  call void @use_i32(i32 %mxy)
187  ret i32 %r
188}
189
190define i64 @or_build_pair_not(i32 %a0, i32 %a1) {
191; CHECK-LABEL: or_build_pair_not:
192; CHECK:       # %bb.0:
193; CHECK-NEXT:    # kill: def $esi killed $esi def $rsi
194; CHECK-NEXT:    shlq $32, %rsi
195; CHECK-NEXT:    movl %edi, %eax
196; CHECK-NEXT:    orq %rsi, %rax
197; CHECK-NEXT:    notq %rax
198; CHECK-NEXT:    retq
199  %n0 = xor i32 %a0, -1
200  %n1 = xor i32 %a1, -1
201  %x0 = zext i32 %n0 to i64
202  %x1 = zext i32 %n1 to i64
203  %hi = shl i64 %x1, 32
204  %r = or i64 %hi, %x0
205  ret i64 %r
206}
207
208define i64 @PR89533(<64 x i8> %a0) {
209; SSE-LABEL: PR89533:
210; SSE:       # %bb.0:
211; SSE-NEXT:    movdqa {{.*#+}} xmm4 = [95,95,95,95,95,95,95,95,95,95,95,95,95,95,95,95]
212; SSE-NEXT:    pcmpeqb %xmm4, %xmm0
213; SSE-NEXT:    pmovmskb %xmm0, %eax
214; SSE-NEXT:    xorl $65535, %eax # imm = 0xFFFF
215; SSE-NEXT:    pcmpeqb %xmm4, %xmm1
216; SSE-NEXT:    pmovmskb %xmm1, %ecx
217; SSE-NEXT:    notl %ecx
218; SSE-NEXT:    shll $16, %ecx
219; SSE-NEXT:    orl %eax, %ecx
220; SSE-NEXT:    pcmpeqb %xmm4, %xmm2
221; SSE-NEXT:    pmovmskb %xmm2, %eax
222; SSE-NEXT:    xorl $65535, %eax # imm = 0xFFFF
223; SSE-NEXT:    pcmpeqb %xmm4, %xmm3
224; SSE-NEXT:    pmovmskb %xmm3, %edx
225; SSE-NEXT:    notl %edx
226; SSE-NEXT:    shll $16, %edx
227; SSE-NEXT:    orl %eax, %edx
228; SSE-NEXT:    shlq $32, %rdx
229; SSE-NEXT:    orq %rcx, %rdx
230; SSE-NEXT:    movl $64, %eax
231; SSE-NEXT:    rep bsfq %rdx, %rax
232; SSE-NEXT:    retq
233;
234; AVX1-LABEL: PR89533:
235; AVX1:       # %bb.0:
236; AVX1-NEXT:    vbroadcastss {{.*#+}} xmm2 = [95,95,95,95,95,95,95,95,95,95,95,95,95,95,95,95]
237; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm0, %xmm3
238; AVX1-NEXT:    vpmovmskb %xmm3, %eax
239; AVX1-NEXT:    xorl $65535, %eax # imm = 0xFFFF
240; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
241; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm0, %xmm0
242; AVX1-NEXT:    vpmovmskb %xmm0, %ecx
243; AVX1-NEXT:    notl %ecx
244; AVX1-NEXT:    shll $16, %ecx
245; AVX1-NEXT:    orl %eax, %ecx
246; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm1, %xmm0
247; AVX1-NEXT:    vpmovmskb %xmm0, %eax
248; AVX1-NEXT:    xorl $65535, %eax # imm = 0xFFFF
249; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
250; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm0, %xmm0
251; AVX1-NEXT:    vpmovmskb %xmm0, %edx
252; AVX1-NEXT:    notl %edx
253; AVX1-NEXT:    shll $16, %edx
254; AVX1-NEXT:    orl %eax, %edx
255; AVX1-NEXT:    shlq $32, %rdx
256; AVX1-NEXT:    orq %rcx, %rdx
257; AVX1-NEXT:    movl $64, %eax
258; AVX1-NEXT:    rep bsfq %rdx, %rax
259; AVX1-NEXT:    vzeroupper
260; AVX1-NEXT:    retq
261;
262; AVX2-LABEL: PR89533:
263; AVX2:       # %bb.0:
264; AVX2-NEXT:    vpbroadcastb {{.*#+}} ymm2 = [95,95,95,95,95,95,95,95,95,95,95,95,95,95,95,95,95,95,95,95,95,95,95,95,95,95,95,95,95,95,95,95]
265; AVX2-NEXT:    vpcmpeqb %ymm2, %ymm0, %ymm0
266; AVX2-NEXT:    vpmovmskb %ymm0, %eax
267; AVX2-NEXT:    vpcmpeqb %ymm2, %ymm1, %ymm0
268; AVX2-NEXT:    vpmovmskb %ymm0, %ecx
269; AVX2-NEXT:    shlq $32, %rcx
270; AVX2-NEXT:    orq %rax, %rcx
271; AVX2-NEXT:    notq %rcx
272; AVX2-NEXT:    xorl %eax, %eax
273; AVX2-NEXT:    tzcntq %rcx, %rax
274; AVX2-NEXT:    vzeroupper
275; AVX2-NEXT:    retq
276  %cmp = icmp ne <64 x i8> %a0, <i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95>
277  %mask = bitcast <64 x i1> %cmp to i64
278  %tz = tail call i64 @llvm.cttz.i64(i64 %mask, i1 false)
279  ret i64 %tz
280}
281
282declare void @use_i32(i32)
283
284