xref: /llvm-project/llvm/test/CodeGen/X86/pr53419.ll (revision 00e3ae447150b839567906c9d2c527d7d32db46c)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2     | FileCheck %s --check-prefixes=X64
3; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.2   | FileCheck %s --check-prefixes=X64
4; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx      | FileCheck %s --check-prefixes=X64
5; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2     | FileCheck %s --check-prefixes=X64
6; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512vl,+avx512bw,+avx512dq | FileCheck %s --check-prefixes=X64
7; RUN: llc < %s -mtriple=i686-unknown   -mattr=+avx2     | FileCheck %s --check-prefixes=X86
8
9declare i1 @llvm.vector.reduce.and.v2i1(<2 x i1>)
10declare i1 @llvm.vector.reduce.and.v4i1(<4 x i1>)
11declare i1 @llvm.vector.reduce.and.v8i1(<8 x i1>)
12
13; All four versions are semantically equivalent and should produce same asm as scalar version.
14
15define i1 @intrinsic_v2i8(ptr align 1 %arg, ptr align 1 %arg1) {
16; X64-LABEL: intrinsic_v2i8:
17; X64:       # %bb.0: # %bb
18; X64-NEXT:    movzwl (%rsi), %eax
19; X64-NEXT:    cmpw (%rdi), %ax
20; X64-NEXT:    sete %al
21; X64-NEXT:    retq
22;
23; X86-LABEL: intrinsic_v2i8:
24; X86:       # %bb.0: # %bb
25; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
26; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
27; X86-NEXT:    movzwl (%ecx), %ecx
28; X86-NEXT:    cmpw (%eax), %cx
29; X86-NEXT:    sete %al
30; X86-NEXT:    retl
31bb:
32  %lhs = load <2 x i8>, ptr %arg1, align 1
33  %rhs = load <2 x i8>, ptr %arg, align 1
34  %cmp = icmp eq <2 x i8> %lhs, %rhs
35  %all_eq = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> %cmp)
36  ret i1 %all_eq
37}
38
39define i1 @intrinsic_v4i8(ptr align 1 %arg, ptr align 1 %arg1) {
40; X64-LABEL: intrinsic_v4i8:
41; X64:       # %bb.0: # %bb
42; X64-NEXT:    movl (%rsi), %eax
43; X64-NEXT:    cmpl (%rdi), %eax
44; X64-NEXT:    sete %al
45; X64-NEXT:    retq
46;
47; X86-LABEL: intrinsic_v4i8:
48; X86:       # %bb.0: # %bb
49; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
50; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
51; X86-NEXT:    movl (%ecx), %ecx
52; X86-NEXT:    cmpl (%eax), %ecx
53; X86-NEXT:    sete %al
54; X86-NEXT:    retl
55bb:
56  %lhs = load <4 x i8>, ptr %arg1, align 1
57  %rhs = load <4 x i8>, ptr %arg, align 1
58  %cmp = icmp eq <4 x i8> %lhs, %rhs
59  %all_eq = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %cmp)
60  ret i1 %all_eq
61}
62
63define i1 @intrinsic_v8i8(ptr align 1 %arg, ptr align 1 %arg1) {
64; X64-LABEL: intrinsic_v8i8:
65; X64:       # %bb.0: # %bb
66; X64-NEXT:    movq (%rsi), %rax
67; X64-NEXT:    cmpq (%rdi), %rax
68; X64-NEXT:    sete %al
69; X64-NEXT:    retq
70;
71; X86-LABEL: intrinsic_v8i8:
72; X86:       # %bb.0: # %bb
73; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
74; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
75; X86-NEXT:    movl (%ecx), %edx
76; X86-NEXT:    movl 4(%ecx), %ecx
77; X86-NEXT:    xorl 4(%eax), %ecx
78; X86-NEXT:    xorl (%eax), %edx
79; X86-NEXT:    orl %ecx, %edx
80; X86-NEXT:    sete %al
81; X86-NEXT:    retl
82bb:
83  %lhs = load <8 x i8>, ptr %arg1, align 1
84  %rhs = load <8 x i8>, ptr %arg, align 1
85  %cmp = icmp eq <8 x i8> %lhs, %rhs
86  %all_eq = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %cmp)
87  ret i1 %all_eq
88}
89
90define i1 @vector_version_v2i8(ptr align 1 %arg, ptr align 1 %arg1) {
91; X64-LABEL: vector_version_v2i8:
92; X64:       # %bb.0: # %bb
93; X64-NEXT:    movzwl (%rsi), %eax
94; X64-NEXT:    cmpw (%rdi), %ax
95; X64-NEXT:    sete %al
96; X64-NEXT:    retq
97;
98; X86-LABEL: vector_version_v2i8:
99; X86:       # %bb.0: # %bb
100; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
101; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
102; X86-NEXT:    movzwl (%ecx), %ecx
103; X86-NEXT:    cmpw (%eax), %cx
104; X86-NEXT:    sete %al
105; X86-NEXT:    retl
106bb:
107  %lhs = load <2 x i8>, ptr %arg1, align 1
108  %rhs = load <2 x i8>, ptr %arg, align 1
109  %any_ne = icmp ne <2 x i8> %lhs, %rhs
110  %any_ne_scalar = bitcast <2 x i1> %any_ne to i2
111  %all_eq = icmp eq i2 %any_ne_scalar, 0
112  ret i1 %all_eq
113}
114
115define i1 @vector_version_v4i8(ptr align 1 %arg, ptr align 1 %arg1) {
116; X64-LABEL: vector_version_v4i8:
117; X64:       # %bb.0: # %bb
118; X64-NEXT:    movl (%rsi), %eax
119; X64-NEXT:    cmpl (%rdi), %eax
120; X64-NEXT:    sete %al
121; X64-NEXT:    retq
122;
123; X86-LABEL: vector_version_v4i8:
124; X86:       # %bb.0: # %bb
125; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
126; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
127; X86-NEXT:    movl (%ecx), %ecx
128; X86-NEXT:    cmpl (%eax), %ecx
129; X86-NEXT:    sete %al
130; X86-NEXT:    retl
131bb:
132  %lhs = load <4 x i8>, ptr %arg1, align 1
133  %rhs = load <4 x i8>, ptr %arg, align 1
134  %any_ne = icmp ne <4 x i8> %lhs, %rhs
135  %any_ne_scalar = bitcast <4 x i1> %any_ne to i4
136  %all_eq = icmp eq i4 %any_ne_scalar, 0
137  ret i1 %all_eq
138}
139
140define i1 @vector_version_v8i8(ptr align 1 %arg, ptr align 1 %arg1) {
141; X64-LABEL: vector_version_v8i8:
142; X64:       # %bb.0: # %bb
143; X64-NEXT:    movq (%rsi), %rax
144; X64-NEXT:    cmpq (%rdi), %rax
145; X64-NEXT:    sete %al
146; X64-NEXT:    retq
147;
148; X86-LABEL: vector_version_v8i8:
149; X86:       # %bb.0: # %bb
150; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
151; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
152; X86-NEXT:    movl (%ecx), %edx
153; X86-NEXT:    movl 4(%ecx), %ecx
154; X86-NEXT:    xorl 4(%eax), %ecx
155; X86-NEXT:    xorl (%eax), %edx
156; X86-NEXT:    orl %ecx, %edx
157; X86-NEXT:    sete %al
158; X86-NEXT:    retl
159bb:
160  %lhs = load <8 x i8>, ptr %arg1, align 1
161  %rhs = load <8 x i8>, ptr %arg, align 1
162  %any_ne = icmp ne <8 x i8> %lhs, %rhs
163  %any_ne_scalar = bitcast <8 x i1> %any_ne to i8
164  %all_eq = icmp eq i8 %any_ne_scalar, 0
165  ret i1 %all_eq
166}
167
168define i1 @mixed_version_v2i8(ptr align 1 %arg, ptr align 1 %arg1) {
169; X64-LABEL: mixed_version_v2i8:
170; X64:       # %bb.0: # %bb
171; X64-NEXT:    movzwl (%rsi), %eax
172; X64-NEXT:    cmpw (%rdi), %ax
173; X64-NEXT:    sete %al
174; X64-NEXT:    retq
175;
176; X86-LABEL: mixed_version_v2i8:
177; X86:       # %bb.0: # %bb
178; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
179; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
180; X86-NEXT:    movzwl (%ecx), %ecx
181; X86-NEXT:    cmpw (%eax), %cx
182; X86-NEXT:    sete %al
183; X86-NEXT:    retl
184bb:
185  %lhs = load <2 x i8>, ptr %arg1, align 1
186  %rhs = load <2 x i8>, ptr %arg, align 1
187  %lhs_s = bitcast <2 x i8> %lhs to i16
188  %rhs_s = bitcast <2 x i8> %rhs to i16
189  %all_eq = icmp eq i16 %lhs_s, %rhs_s
190  ret i1 %all_eq
191}
192
193define i1 @mixed_version_v4i8(ptr align 1 %arg, ptr align 1 %arg1) {
194; X64-LABEL: mixed_version_v4i8:
195; X64:       # %bb.0: # %bb
196; X64-NEXT:    movl (%rsi), %eax
197; X64-NEXT:    cmpl (%rdi), %eax
198; X64-NEXT:    sete %al
199; X64-NEXT:    retq
200;
201; X86-LABEL: mixed_version_v4i8:
202; X86:       # %bb.0: # %bb
203; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
204; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
205; X86-NEXT:    movl (%ecx), %ecx
206; X86-NEXT:    cmpl (%eax), %ecx
207; X86-NEXT:    sete %al
208; X86-NEXT:    retl
209bb:
210  %lhs = load <4 x i8>, ptr %arg1, align 1
211  %rhs = load <4 x i8>, ptr %arg, align 1
212  %lhs_s = bitcast <4 x i8> %lhs to i32
213  %rhs_s = bitcast <4 x i8> %rhs to i32
214  %all_eq = icmp eq i32 %lhs_s, %rhs_s
215  ret i1 %all_eq
216}
217
218define i1 @mixed_version_v8i8(ptr align 1 %arg, ptr align 1 %arg1) {
219; X64-LABEL: mixed_version_v8i8:
220; X64:       # %bb.0: # %bb
221; X64-NEXT:    movq (%rsi), %rax
222; X64-NEXT:    cmpq (%rdi), %rax
223; X64-NEXT:    sete %al
224; X64-NEXT:    retq
225;
226; X86-LABEL: mixed_version_v8i8:
227; X86:       # %bb.0: # %bb
228; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
229; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
230; X86-NEXT:    movl (%ecx), %edx
231; X86-NEXT:    movl 4(%ecx), %ecx
232; X86-NEXT:    xorl 4(%eax), %ecx
233; X86-NEXT:    xorl (%eax), %edx
234; X86-NEXT:    orl %ecx, %edx
235; X86-NEXT:    sete %al
236; X86-NEXT:    retl
237bb:
238  %lhs = load <8 x i8>, ptr %arg1, align 1
239  %rhs = load <8 x i8>, ptr %arg, align 1
240  %lhs_s = bitcast <8 x i8> %lhs to i64
241  %rhs_s = bitcast <8 x i8> %rhs to i64
242  %all_eq = icmp eq i64 %lhs_s, %rhs_s
243  ret i1 %all_eq
244}
245
246define i1 @scalar_version_i16(ptr align 1 %arg, ptr align 1 %arg1) {
247; X64-LABEL: scalar_version_i16:
248; X64:       # %bb.0: # %bb
249; X64-NEXT:    movzwl (%rsi), %eax
250; X64-NEXT:    cmpw (%rdi), %ax
251; X64-NEXT:    sete %al
252; X64-NEXT:    retq
253;
254; X86-LABEL: scalar_version_i16:
255; X86:       # %bb.0: # %bb
256; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
257; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
258; X86-NEXT:    movzwl (%ecx), %ecx
259; X86-NEXT:    cmpw (%eax), %cx
260; X86-NEXT:    sete %al
261; X86-NEXT:    retl
262bb:
263  %lhs = load i16, ptr %arg1, align 1
264  %rhs = load i16, ptr %arg, align 1
265  %all_eq = icmp eq i16 %lhs, %rhs
266  ret i1 %all_eq
267}
268
269define i1 @scalar_version_i32(ptr align 1 %arg, ptr align 1 %arg1) {
270; X64-LABEL: scalar_version_i32:
271; X64:       # %bb.0: # %bb
272; X64-NEXT:    movl (%rsi), %eax
273; X64-NEXT:    cmpl (%rdi), %eax
274; X64-NEXT:    sete %al
275; X64-NEXT:    retq
276;
277; X86-LABEL: scalar_version_i32:
278; X86:       # %bb.0: # %bb
279; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
280; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
281; X86-NEXT:    movl (%ecx), %ecx
282; X86-NEXT:    cmpl (%eax), %ecx
283; X86-NEXT:    sete %al
284; X86-NEXT:    retl
285bb:
286  %lhs = load i32, ptr %arg1, align 1
287  %rhs = load i32, ptr %arg, align 1
288  %all_eq = icmp eq i32 %lhs, %rhs
289  ret i1 %all_eq
290}
291
292define i1 @scalar_version_i64(ptr align 1 %arg, ptr align 1 %arg1) {
293; X64-LABEL: scalar_version_i64:
294; X64:       # %bb.0: # %bb
295; X64-NEXT:    movq (%rsi), %rax
296; X64-NEXT:    cmpq (%rdi), %rax
297; X64-NEXT:    sete %al
298; X64-NEXT:    retq
299;
300; X86-LABEL: scalar_version_i64:
301; X86:       # %bb.0: # %bb
302; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
303; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
304; X86-NEXT:    movl (%ecx), %edx
305; X86-NEXT:    movl 4(%ecx), %ecx
306; X86-NEXT:    xorl 4(%eax), %ecx
307; X86-NEXT:    xorl (%eax), %edx
308; X86-NEXT:    orl %ecx, %edx
309; X86-NEXT:    sete %al
310; X86-NEXT:    retl
311bb:
312  %lhs = load i64, ptr %arg1, align 1
313  %rhs = load i64, ptr %arg, align 1
314  %all_eq = icmp eq i64 %lhs, %rhs
315  ret i1 %all_eq
316}
317