xref: /llvm-project/llvm/test/CodeGen/X86/memcmp-optsize.ll (revision 4dfea22e771a0944b3b313f2790a616fa79257e1)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=X64,X64-SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX1
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX2
5
6; This tests codegen time inlining/optimization of memcmp
7; rdar://6480398
8
9@.str = private constant [65 x i8] c"0123456789012345678901234567890123456789012345678901234567890123\00", align 1
10
11declare dso_local i32 @memcmp(ptr, ptr, i64)
12declare dso_local i32 @bcmp(ptr, ptr, i64)
13
14define i32 @length2(ptr %X, ptr %Y) nounwind optsize {
15; X64-LABEL: length2:
16; X64:       # %bb.0:
17; X64-NEXT:    movzwl (%rdi), %eax
18; X64-NEXT:    movzwl (%rsi), %ecx
19; X64-NEXT:    rolw $8, %ax
20; X64-NEXT:    rolw $8, %cx
21; X64-NEXT:    movzwl %ax, %eax
22; X64-NEXT:    movzwl %cx, %ecx
23; X64-NEXT:    subl %ecx, %eax
24; X64-NEXT:    retq
25  %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind
26  ret i32 %m
27}
28
29define i1 @length2_eq(ptr %X, ptr %Y) nounwind optsize {
30; X64-LABEL: length2_eq:
31; X64:       # %bb.0:
32; X64-NEXT:    movzwl (%rdi), %eax
33; X64-NEXT:    cmpw (%rsi), %ax
34; X64-NEXT:    sete %al
35; X64-NEXT:    retq
36  %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind
37  %c = icmp eq i32 %m, 0
38  ret i1 %c
39}
40
41define i1 @length2_eq_const(ptr %X) nounwind optsize {
42; X64-LABEL: length2_eq_const:
43; X64:       # %bb.0:
44; X64-NEXT:    cmpw $12849, (%rdi) # imm = 0x3231
45; X64-NEXT:    setne %al
46; X64-NEXT:    retq
47  %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i64 2) nounwind
48  %c = icmp ne i32 %m, 0
49  ret i1 %c
50}
51
52define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) nounwind optsize {
53; X64-LABEL: length2_eq_nobuiltin_attr:
54; X64:       # %bb.0:
55; X64-NEXT:    pushq %rax
56; X64-NEXT:    movl $2, %edx
57; X64-NEXT:    callq memcmp
58; X64-NEXT:    testl %eax, %eax
59; X64-NEXT:    sete %al
60; X64-NEXT:    popq %rcx
61; X64-NEXT:    retq
62  %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind nobuiltin
63  %c = icmp eq i32 %m, 0
64  ret i1 %c
65}
66
67define i32 @length3(ptr %X, ptr %Y) nounwind optsize {
68; X64-LABEL: length3:
69; X64:       # %bb.0:
70; X64-NEXT:    movzwl (%rdi), %ecx
71; X64-NEXT:    movzwl (%rsi), %edx
72; X64-NEXT:    rolw $8, %cx
73; X64-NEXT:    rolw $8, %dx
74; X64-NEXT:    cmpw %dx, %cx
75; X64-NEXT:    jne .LBB4_3
76; X64-NEXT:  # %bb.1: # %loadbb1
77; X64-NEXT:    movzbl 2(%rdi), %eax
78; X64-NEXT:    movzbl 2(%rsi), %ecx
79; X64-NEXT:    subl %ecx, %eax
80; X64-NEXT:    retq
81; X64-NEXT:  .LBB4_3: # %res_block
82; X64-NEXT:    xorl %eax, %eax
83; X64-NEXT:    cmpw %dx, %cx
84; X64-NEXT:    sbbl %eax, %eax
85; X64-NEXT:    orl $1, %eax
86; X64-NEXT:    retq
87  %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind
88  ret i32 %m
89}
90
91define i1 @length3_eq(ptr %X, ptr %Y) nounwind optsize {
92; X64-LABEL: length3_eq:
93; X64:       # %bb.0:
94; X64-NEXT:    movzwl (%rdi), %eax
95; X64-NEXT:    xorw (%rsi), %ax
96; X64-NEXT:    movb 2(%rdi), %cl
97; X64-NEXT:    xorb 2(%rsi), %cl
98; X64-NEXT:    movzbl %cl, %ecx
99; X64-NEXT:    orw %ax, %cx
100; X64-NEXT:    setne %al
101; X64-NEXT:    retq
102  %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind
103  %c = icmp ne i32 %m, 0
104  ret i1 %c
105}
106
107define i32 @length4(ptr %X, ptr %Y) nounwind optsize {
108; X64-LABEL: length4:
109; X64:       # %bb.0:
110; X64-NEXT:    movl (%rdi), %eax
111; X64-NEXT:    movl (%rsi), %ecx
112; X64-NEXT:    bswapl %eax
113; X64-NEXT:    bswapl %ecx
114; X64-NEXT:    cmpl %ecx, %eax
115; X64-NEXT:    seta %al
116; X64-NEXT:    sbbb $0, %al
117; X64-NEXT:    movsbl %al, %eax
118; X64-NEXT:    retq
119  %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
120  ret i32 %m
121}
122
123define i1 @length4_eq(ptr %X, ptr %Y) nounwind optsize {
124; X64-LABEL: length4_eq:
125; X64:       # %bb.0:
126; X64-NEXT:    movl (%rdi), %eax
127; X64-NEXT:    cmpl (%rsi), %eax
128; X64-NEXT:    setne %al
129; X64-NEXT:    retq
130  %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
131  %c = icmp ne i32 %m, 0
132  ret i1 %c
133}
134
135define i1 @length4_eq_const(ptr %X) nounwind optsize {
136; X64-LABEL: length4_eq_const:
137; X64:       # %bb.0:
138; X64-NEXT:    cmpl $875770417, (%rdi) # imm = 0x34333231
139; X64-NEXT:    sete %al
140; X64-NEXT:    retq
141  %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i64 4) nounwind
142  %c = icmp eq i32 %m, 0
143  ret i1 %c
144}
145
146define i32 @length5(ptr %X, ptr %Y) nounwind optsize {
147; X64-LABEL: length5:
148; X64:       # %bb.0:
149; X64-NEXT:    movl (%rdi), %ecx
150; X64-NEXT:    movl (%rsi), %edx
151; X64-NEXT:    bswapl %ecx
152; X64-NEXT:    bswapl %edx
153; X64-NEXT:    cmpl %edx, %ecx
154; X64-NEXT:    jne .LBB9_3
155; X64-NEXT:  # %bb.1: # %loadbb1
156; X64-NEXT:    movzbl 4(%rdi), %eax
157; X64-NEXT:    movzbl 4(%rsi), %ecx
158; X64-NEXT:    subl %ecx, %eax
159; X64-NEXT:    retq
160; X64-NEXT:  .LBB9_3: # %res_block
161; X64-NEXT:    xorl %eax, %eax
162; X64-NEXT:    cmpl %edx, %ecx
163; X64-NEXT:    sbbl %eax, %eax
164; X64-NEXT:    orl $1, %eax
165; X64-NEXT:    retq
166  %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind
167  ret i32 %m
168}
169
170define i1 @length5_eq(ptr %X, ptr %Y) nounwind optsize {
171; X64-LABEL: length5_eq:
172; X64:       # %bb.0:
173; X64-NEXT:    movl (%rdi), %eax
174; X64-NEXT:    xorl (%rsi), %eax
175; X64-NEXT:    movb 4(%rdi), %cl
176; X64-NEXT:    xorb 4(%rsi), %cl
177; X64-NEXT:    movzbl %cl, %ecx
178; X64-NEXT:    orl %eax, %ecx
179; X64-NEXT:    setne %al
180; X64-NEXT:    retq
181  %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind
182  %c = icmp ne i32 %m, 0
183  ret i1 %c
184}
185
186define i32 @length8(ptr %X, ptr %Y) nounwind optsize {
187; X64-LABEL: length8:
188; X64:       # %bb.0:
189; X64-NEXT:    movq (%rdi), %rax
190; X64-NEXT:    movq (%rsi), %rcx
191; X64-NEXT:    bswapq %rax
192; X64-NEXT:    bswapq %rcx
193; X64-NEXT:    cmpq %rcx, %rax
194; X64-NEXT:    seta %al
195; X64-NEXT:    sbbb $0, %al
196; X64-NEXT:    movsbl %al, %eax
197; X64-NEXT:    retq
198  %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) nounwind
199  ret i32 %m
200}
201
202define i1 @length8_eq(ptr %X, ptr %Y) nounwind optsize {
203; X64-LABEL: length8_eq:
204; X64:       # %bb.0:
205; X64-NEXT:    movq (%rdi), %rax
206; X64-NEXT:    cmpq (%rsi), %rax
207; X64-NEXT:    sete %al
208; X64-NEXT:    retq
209  %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) nounwind
210  %c = icmp eq i32 %m, 0
211  ret i1 %c
212}
213
214define i1 @length8_eq_const(ptr %X) nounwind optsize {
215; X64-LABEL: length8_eq_const:
216; X64:       # %bb.0:
217; X64-NEXT:    movabsq $3978425819141910832, %rax # imm = 0x3736353433323130
218; X64-NEXT:    cmpq %rax, (%rdi)
219; X64-NEXT:    setne %al
220; X64-NEXT:    retq
221  %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 8) nounwind
222  %c = icmp ne i32 %m, 0
223  ret i1 %c
224}
225
226define i1 @length12_eq(ptr %X, ptr %Y) nounwind optsize {
227; X64-LABEL: length12_eq:
228; X64:       # %bb.0:
229; X64-NEXT:    movq (%rdi), %rax
230; X64-NEXT:    xorq (%rsi), %rax
231; X64-NEXT:    movl 8(%rdi), %ecx
232; X64-NEXT:    xorl 8(%rsi), %ecx
233; X64-NEXT:    orq %rax, %rcx
234; X64-NEXT:    setne %al
235; X64-NEXT:    retq
236  %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind
237  %c = icmp ne i32 %m, 0
238  ret i1 %c
239}
240
241define i32 @length12(ptr %X, ptr %Y) nounwind optsize {
242; X64-LABEL: length12:
243; X64:       # %bb.0:
244; X64-NEXT:    movq (%rdi), %rcx
245; X64-NEXT:    movq (%rsi), %rdx
246; X64-NEXT:    bswapq %rcx
247; X64-NEXT:    bswapq %rdx
248; X64-NEXT:    cmpq %rdx, %rcx
249; X64-NEXT:    jne .LBB15_2
250; X64-NEXT:  # %bb.1: # %loadbb1
251; X64-NEXT:    movl 8(%rdi), %ecx
252; X64-NEXT:    movl 8(%rsi), %edx
253; X64-NEXT:    bswapl %ecx
254; X64-NEXT:    bswapl %edx
255; X64-NEXT:    xorl %eax, %eax
256; X64-NEXT:    cmpq %rdx, %rcx
257; X64-NEXT:    je .LBB15_3
258; X64-NEXT:  .LBB15_2: # %res_block
259; X64-NEXT:    xorl %eax, %eax
260; X64-NEXT:    cmpq %rdx, %rcx
261; X64-NEXT:    sbbl %eax, %eax
262; X64-NEXT:    orl $1, %eax
263; X64-NEXT:  .LBB15_3: # %endblock
264; X64-NEXT:    retq
265  %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind
266  ret i32 %m
267}
268
269; PR33329 - https://bugs.llvm.org/show_bug.cgi?id=33329
270
271define i32 @length16(ptr %X, ptr %Y) nounwind optsize {
272; X64-LABEL: length16:
273; X64:       # %bb.0:
274; X64-NEXT:    movq (%rdi), %rcx
275; X64-NEXT:    movq (%rsi), %rdx
276; X64-NEXT:    bswapq %rcx
277; X64-NEXT:    bswapq %rdx
278; X64-NEXT:    cmpq %rdx, %rcx
279; X64-NEXT:    jne .LBB16_2
280; X64-NEXT:  # %bb.1: # %loadbb1
281; X64-NEXT:    movq 8(%rdi), %rcx
282; X64-NEXT:    movq 8(%rsi), %rdx
283; X64-NEXT:    bswapq %rcx
284; X64-NEXT:    bswapq %rdx
285; X64-NEXT:    xorl %eax, %eax
286; X64-NEXT:    cmpq %rdx, %rcx
287; X64-NEXT:    je .LBB16_3
288; X64-NEXT:  .LBB16_2: # %res_block
289; X64-NEXT:    xorl %eax, %eax
290; X64-NEXT:    cmpq %rdx, %rcx
291; X64-NEXT:    sbbl %eax, %eax
292; X64-NEXT:    orl $1, %eax
293; X64-NEXT:  .LBB16_3: # %endblock
294; X64-NEXT:    retq
295  %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 16) nounwind
296  ret i32 %m
297}
298
299define i1 @length16_eq(ptr %x, ptr %y) nounwind optsize {
300; X64-SSE2-LABEL: length16_eq:
301; X64-SSE2:       # %bb.0:
302; X64-SSE2-NEXT:    movdqu (%rdi), %xmm0
303; X64-SSE2-NEXT:    movdqu (%rsi), %xmm1
304; X64-SSE2-NEXT:    pcmpeqb %xmm0, %xmm1
305; X64-SSE2-NEXT:    pmovmskb %xmm1, %eax
306; X64-SSE2-NEXT:    cmpl $65535, %eax # imm = 0xFFFF
307; X64-SSE2-NEXT:    setne %al
308; X64-SSE2-NEXT:    retq
309;
310; X64-AVX-LABEL: length16_eq:
311; X64-AVX:       # %bb.0:
312; X64-AVX-NEXT:    vmovdqu (%rdi), %xmm0
313; X64-AVX-NEXT:    vpxor (%rsi), %xmm0, %xmm0
314; X64-AVX-NEXT:    vptest %xmm0, %xmm0
315; X64-AVX-NEXT:    setne %al
316; X64-AVX-NEXT:    retq
317  %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 16) nounwind
318  %cmp = icmp ne i32 %call, 0
319  ret i1 %cmp
320}
321
322define i1 @length16_eq_const(ptr %X) nounwind optsize {
323; X64-SSE2-LABEL: length16_eq_const:
324; X64-SSE2:       # %bb.0:
325; X64-SSE2-NEXT:    movdqu (%rdi), %xmm0
326; X64-SSE2-NEXT:    pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
327; X64-SSE2-NEXT:    pmovmskb %xmm0, %eax
328; X64-SSE2-NEXT:    cmpl $65535, %eax # imm = 0xFFFF
329; X64-SSE2-NEXT:    sete %al
330; X64-SSE2-NEXT:    retq
331;
332; X64-AVX-LABEL: length16_eq_const:
333; X64-AVX:       # %bb.0:
334; X64-AVX-NEXT:    vmovdqu (%rdi), %xmm0
335; X64-AVX-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
336; X64-AVX-NEXT:    vptest %xmm0, %xmm0
337; X64-AVX-NEXT:    sete %al
338; X64-AVX-NEXT:    retq
339  %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 16) nounwind
340  %c = icmp eq i32 %m, 0
341  ret i1 %c
342}
343
344; PR33914 - https://bugs.llvm.org/show_bug.cgi?id=33914
345
346define i32 @length24(ptr %X, ptr %Y) nounwind optsize {
347; X64-LABEL: length24:
348; X64:       # %bb.0:
349; X64-NEXT:    movl $24, %edx
350; X64-NEXT:    jmp memcmp # TAILCALL
351  %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 24) nounwind
352  ret i32 %m
353}
354
355define i1 @length24_eq(ptr %x, ptr %y) nounwind optsize {
356; X64-SSE2-LABEL: length24_eq:
357; X64-SSE2:       # %bb.0:
358; X64-SSE2-NEXT:    movdqu (%rdi), %xmm0
359; X64-SSE2-NEXT:    movdqu (%rsi), %xmm1
360; X64-SSE2-NEXT:    pcmpeqb %xmm0, %xmm1
361; X64-SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
362; X64-SSE2-NEXT:    movq {{.*#+}} xmm2 = mem[0],zero
363; X64-SSE2-NEXT:    pcmpeqb %xmm0, %xmm2
364; X64-SSE2-NEXT:    pand %xmm1, %xmm2
365; X64-SSE2-NEXT:    pmovmskb %xmm2, %eax
366; X64-SSE2-NEXT:    cmpl $65535, %eax # imm = 0xFFFF
367; X64-SSE2-NEXT:    sete %al
368; X64-SSE2-NEXT:    retq
369;
370; X64-AVX-LABEL: length24_eq:
371; X64-AVX:       # %bb.0:
372; X64-AVX-NEXT:    vmovdqu (%rdi), %xmm0
373; X64-AVX-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
374; X64-AVX-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero
375; X64-AVX-NEXT:    vpxor %xmm2, %xmm1, %xmm1
376; X64-AVX-NEXT:    vpxor (%rsi), %xmm0, %xmm0
377; X64-AVX-NEXT:    vpor %xmm0, %xmm1, %xmm0
378; X64-AVX-NEXT:    vptest %xmm0, %xmm0
379; X64-AVX-NEXT:    sete %al
380; X64-AVX-NEXT:    retq
381  %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 24) nounwind
382  %cmp = icmp eq i32 %call, 0
383  ret i1 %cmp
384}
385
386define i1 @length24_eq_const(ptr %X) nounwind optsize {
387; X64-SSE2-LABEL: length24_eq_const:
388; X64-SSE2:       # %bb.0:
389; X64-SSE2-NEXT:    movdqu (%rdi), %xmm0
390; X64-SSE2-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
391; X64-SSE2-NEXT:    pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
392; X64-SSE2-NEXT:    pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
393; X64-SSE2-NEXT:    pand %xmm1, %xmm0
394; X64-SSE2-NEXT:    pmovmskb %xmm0, %eax
395; X64-SSE2-NEXT:    cmpl $65535, %eax # imm = 0xFFFF
396; X64-SSE2-NEXT:    setne %al
397; X64-SSE2-NEXT:    retq
398;
399; X64-AVX-LABEL: length24_eq_const:
400; X64-AVX:       # %bb.0:
401; X64-AVX-NEXT:    vmovdqu (%rdi), %xmm0
402; X64-AVX-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
403; X64-AVX-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
404; X64-AVX-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
405; X64-AVX-NEXT:    vpor %xmm1, %xmm0, %xmm0
406; X64-AVX-NEXT:    vptest %xmm0, %xmm0
407; X64-AVX-NEXT:    setne %al
408; X64-AVX-NEXT:    retq
409  %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 24) nounwind
410  %c = icmp ne i32 %m, 0
411  ret i1 %c
412}
413
414define i32 @length32(ptr %X, ptr %Y) nounwind optsize {
415; X64-LABEL: length32:
416; X64:       # %bb.0:
417; X64-NEXT:    movl $32, %edx
418; X64-NEXT:    jmp memcmp # TAILCALL
419  %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 32) nounwind
420  ret i32 %m
421}
422
423; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325
424
425define i1 @length32_eq(ptr %x, ptr %y) nounwind optsize {
426; X64-SSE2-LABEL: length32_eq:
427; X64-SSE2:       # %bb.0:
428; X64-SSE2-NEXT:    movdqu (%rdi), %xmm0
429; X64-SSE2-NEXT:    movdqu 16(%rdi), %xmm1
430; X64-SSE2-NEXT:    movdqu (%rsi), %xmm2
431; X64-SSE2-NEXT:    pcmpeqb %xmm0, %xmm2
432; X64-SSE2-NEXT:    movdqu 16(%rsi), %xmm0
433; X64-SSE2-NEXT:    pcmpeqb %xmm1, %xmm0
434; X64-SSE2-NEXT:    pand %xmm2, %xmm0
435; X64-SSE2-NEXT:    pmovmskb %xmm0, %eax
436; X64-SSE2-NEXT:    cmpl $65535, %eax # imm = 0xFFFF
437; X64-SSE2-NEXT:    sete %al
438; X64-SSE2-NEXT:    retq
439;
440; X64-AVX1-LABEL: length32_eq:
441; X64-AVX1:       # %bb.0:
442; X64-AVX1-NEXT:    vmovups (%rdi), %ymm0
443; X64-AVX1-NEXT:    vxorps (%rsi), %ymm0, %ymm0
444; X64-AVX1-NEXT:    vptest %ymm0, %ymm0
445; X64-AVX1-NEXT:    sete %al
446; X64-AVX1-NEXT:    vzeroupper
447; X64-AVX1-NEXT:    retq
448;
449; X64-AVX2-LABEL: length32_eq:
450; X64-AVX2:       # %bb.0:
451; X64-AVX2-NEXT:    vmovdqu (%rdi), %ymm0
452; X64-AVX2-NEXT:    vpxor (%rsi), %ymm0, %ymm0
453; X64-AVX2-NEXT:    vptest %ymm0, %ymm0
454; X64-AVX2-NEXT:    sete %al
455; X64-AVX2-NEXT:    vzeroupper
456; X64-AVX2-NEXT:    retq
457  %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind
458  %cmp = icmp eq i32 %call, 0
459  ret i1 %cmp
460}
461
462define i1 @length32_eq_const(ptr %X) nounwind optsize {
463; X64-SSE2-LABEL: length32_eq_const:
464; X64-SSE2:       # %bb.0:
465; X64-SSE2-NEXT:    movdqu (%rdi), %xmm0
466; X64-SSE2-NEXT:    movdqu 16(%rdi), %xmm1
467; X64-SSE2-NEXT:    pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
468; X64-SSE2-NEXT:    pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
469; X64-SSE2-NEXT:    pand %xmm1, %xmm0
470; X64-SSE2-NEXT:    pmovmskb %xmm0, %eax
471; X64-SSE2-NEXT:    cmpl $65535, %eax # imm = 0xFFFF
472; X64-SSE2-NEXT:    setne %al
473; X64-SSE2-NEXT:    retq
474;
475; X64-AVX1-LABEL: length32_eq_const:
476; X64-AVX1:       # %bb.0:
477; X64-AVX1-NEXT:    vmovups (%rdi), %ymm0
478; X64-AVX1-NEXT:    vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
479; X64-AVX1-NEXT:    vptest %ymm0, %ymm0
480; X64-AVX1-NEXT:    setne %al
481; X64-AVX1-NEXT:    vzeroupper
482; X64-AVX1-NEXT:    retq
483;
484; X64-AVX2-LABEL: length32_eq_const:
485; X64-AVX2:       # %bb.0:
486; X64-AVX2-NEXT:    vmovdqu (%rdi), %ymm0
487; X64-AVX2-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
488; X64-AVX2-NEXT:    vptest %ymm0, %ymm0
489; X64-AVX2-NEXT:    setne %al
490; X64-AVX2-NEXT:    vzeroupper
491; X64-AVX2-NEXT:    retq
492  %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 32) nounwind
493  %c = icmp ne i32 %m, 0
494  ret i1 %c
495}
496
497define i32 @length64(ptr %X, ptr %Y) nounwind optsize {
498; X64-LABEL: length64:
499; X64:       # %bb.0:
500; X64-NEXT:    movl $64, %edx
501; X64-NEXT:    jmp memcmp # TAILCALL
502  %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 64) nounwind
503  ret i32 %m
504}
505
506define i1 @length64_eq(ptr %x, ptr %y) nounwind optsize {
507; X64-SSE2-LABEL: length64_eq:
508; X64-SSE2:       # %bb.0:
509; X64-SSE2-NEXT:    pushq %rax
510; X64-SSE2-NEXT:    movl $64, %edx
511; X64-SSE2-NEXT:    callq memcmp
512; X64-SSE2-NEXT:    testl %eax, %eax
513; X64-SSE2-NEXT:    setne %al
514; X64-SSE2-NEXT:    popq %rcx
515; X64-SSE2-NEXT:    retq
516;
517; X64-AVX1-LABEL: length64_eq:
518; X64-AVX1:       # %bb.0:
519; X64-AVX1-NEXT:    vmovups (%rdi), %ymm0
520; X64-AVX1-NEXT:    vmovups 32(%rdi), %ymm1
521; X64-AVX1-NEXT:    vxorps 32(%rsi), %ymm1, %ymm1
522; X64-AVX1-NEXT:    vxorps (%rsi), %ymm0, %ymm0
523; X64-AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
524; X64-AVX1-NEXT:    vptest %ymm0, %ymm0
525; X64-AVX1-NEXT:    setne %al
526; X64-AVX1-NEXT:    vzeroupper
527; X64-AVX1-NEXT:    retq
528;
529; X64-AVX2-LABEL: length64_eq:
530; X64-AVX2:       # %bb.0:
531; X64-AVX2-NEXT:    vmovdqu (%rdi), %ymm0
532; X64-AVX2-NEXT:    vmovdqu 32(%rdi), %ymm1
533; X64-AVX2-NEXT:    vpxor 32(%rsi), %ymm1, %ymm1
534; X64-AVX2-NEXT:    vpxor (%rsi), %ymm0, %ymm0
535; X64-AVX2-NEXT:    vpor %ymm1, %ymm0, %ymm0
536; X64-AVX2-NEXT:    vptest %ymm0, %ymm0
537; X64-AVX2-NEXT:    setne %al
538; X64-AVX2-NEXT:    vzeroupper
539; X64-AVX2-NEXT:    retq
540  %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 64) nounwind
541  %cmp = icmp ne i32 %call, 0
542  ret i1 %cmp
543}
544
545define i1 @length64_eq_const(ptr %X) nounwind optsize {
546; X64-SSE2-LABEL: length64_eq_const:
547; X64-SSE2:       # %bb.0:
548; X64-SSE2-NEXT:    pushq %rax
549; X64-SSE2-NEXT:    movl $.L.str, %esi
550; X64-SSE2-NEXT:    movl $64, %edx
551; X64-SSE2-NEXT:    callq memcmp
552; X64-SSE2-NEXT:    testl %eax, %eax
553; X64-SSE2-NEXT:    sete %al
554; X64-SSE2-NEXT:    popq %rcx
555; X64-SSE2-NEXT:    retq
556;
557; X64-AVX1-LABEL: length64_eq_const:
558; X64-AVX1:       # %bb.0:
559; X64-AVX1-NEXT:    vmovups (%rdi), %ymm0
560; X64-AVX1-NEXT:    vmovups 32(%rdi), %ymm1
561; X64-AVX1-NEXT:    vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
562; X64-AVX1-NEXT:    vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
563; X64-AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
564; X64-AVX1-NEXT:    vptest %ymm0, %ymm0
565; X64-AVX1-NEXT:    sete %al
566; X64-AVX1-NEXT:    vzeroupper
567; X64-AVX1-NEXT:    retq
568;
569; X64-AVX2-LABEL: length64_eq_const:
570; X64-AVX2:       # %bb.0:
571; X64-AVX2-NEXT:    vmovdqu (%rdi), %ymm0
572; X64-AVX2-NEXT:    vmovdqu 32(%rdi), %ymm1
573; X64-AVX2-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
574; X64-AVX2-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
575; X64-AVX2-NEXT:    vpor %ymm1, %ymm0, %ymm0
576; X64-AVX2-NEXT:    vptest %ymm0, %ymm0
577; X64-AVX2-NEXT:    sete %al
578; X64-AVX2-NEXT:    vzeroupper
579; X64-AVX2-NEXT:    retq
580  %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 64) nounwind
581  %c = icmp eq i32 %m, 0
582  ret i1 %c
583}
584
585define i32 @bcmp_length2(ptr %X, ptr %Y) nounwind optsize {
586; X64-LABEL: bcmp_length2:
587; X64:       # %bb.0:
588; X64-NEXT:    movzwl (%rdi), %ecx
589; X64-NEXT:    xorl %eax, %eax
590; X64-NEXT:    cmpw (%rsi), %cx
591; X64-NEXT:    setne %al
592; X64-NEXT:    retq
593  %m = tail call i32 @bcmp(ptr %X, ptr %Y, i64 2) nounwind
594  ret i32 %m
595}
596