xref: /illumos-gate/usr/src/lib/libc/sparcv9/gen/memcmp.S (revision 55fea89dcaa64928bed4327112404dcb3e07b79f)
1*5d9d9091SRichard Lowe/*
2*5d9d9091SRichard Lowe * CDDL HEADER START
3*5d9d9091SRichard Lowe *
4*5d9d9091SRichard Lowe * The contents of this file are subject to the terms of the
5*5d9d9091SRichard Lowe * Common Development and Distribution License (the "License").
6*5d9d9091SRichard Lowe * You may not use this file except in compliance with the License.
7*5d9d9091SRichard Lowe *
8*5d9d9091SRichard Lowe * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*5d9d9091SRichard Lowe * or http://www.opensolaris.org/os/licensing.
10*5d9d9091SRichard Lowe * See the License for the specific language governing permissions
11*5d9d9091SRichard Lowe * and limitations under the License.
12*5d9d9091SRichard Lowe *
13*5d9d9091SRichard Lowe * When distributing Covered Code, include this CDDL HEADER in each
14*5d9d9091SRichard Lowe * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*5d9d9091SRichard Lowe * If applicable, add the following below this CDDL HEADER, with the
16*5d9d9091SRichard Lowe * fields enclosed by brackets "[]" replaced with your own identifying
17*5d9d9091SRichard Lowe * information: Portions Copyright [yyyy] [name of copyright owner]
18*5d9d9091SRichard Lowe *
19*5d9d9091SRichard Lowe * CDDL HEADER END
20*5d9d9091SRichard Lowe */
21*5d9d9091SRichard Lowe
22*5d9d9091SRichard Lowe/*
23*5d9d9091SRichard Lowe * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24*5d9d9091SRichard Lowe * Use is subject to license terms.
25*5d9d9091SRichard Lowe */
26*5d9d9091SRichard Lowe
27*5d9d9091SRichard Lowe	.file	"memcmp.s"
28*5d9d9091SRichard Lowe
29*5d9d9091SRichard Lowe/*
30*5d9d9091SRichard Lowe * memcmp(s1, s2, len)
31*5d9d9091SRichard Lowe *
32*5d9d9091SRichard Lowe * Compare n bytes:  s1>s2: >0  s1==s2: 0  s1<s2: <0
33*5d9d9091SRichard Lowe *
34*5d9d9091SRichard Lowe * Fast assembler language version of the following C-program for memcmp
35*5d9d9091SRichard Lowe * which represents the `standard' for the C-library.
36*5d9d9091SRichard Lowe *
37*5d9d9091SRichard Lowe *	int
38*5d9d9091SRichard Lowe *	memcmp(const void *s1, const void *s2, size_t n)
39*5d9d9091SRichard Lowe *	{
40*5d9d9091SRichard Lowe *		if (s1 != s2 && n != 0) {
41*5d9d9091SRichard Lowe *			const char *ps1 = s1;
42*5d9d9091SRichard Lowe *			const char *ps2 = s2;
43*5d9d9091SRichard Lowe *			do {
44*5d9d9091SRichard Lowe *				if (*ps1++ != *ps2++)
45*5d9d9091SRichard Lowe *					return (ps1[-1] - ps2[-1]);
46*5d9d9091SRichard Lowe *			} while (--n != 0);
47*5d9d9091SRichard Lowe *		}
48*5d9d9091SRichard Lowe *		return (0);
49*5d9d9091SRichard Lowe *	}
50*5d9d9091SRichard Lowe */
51*5d9d9091SRichard Lowe
52*5d9d9091SRichard Lowe#include <sys/asm_linkage.h>
53*5d9d9091SRichard Lowe
54*5d9d9091SRichard Lowe	ANSI_PRAGMA_WEAK(memcmp,function)
55*5d9d9091SRichard Lowe
56*5d9d9091SRichard Lowe	ENTRY(memcmp)
57*5d9d9091SRichard Lowe	cmp	%o0, %o1		! s1 == s2?
58*5d9d9091SRichard Lowe	be,pn	%xcc, .cmpeq
59*5d9d9091SRichard Lowe	cmp	%o2, 17
60*5d9d9091SRichard Lowe	bleu,a,pn %xcc, .cmpbyt		! for small counts go do bytes
61*5d9d9091SRichard Lowe	sub	%o1, %o0, %o1
62*5d9d9091SRichard Lowe
63*5d9d9091SRichard Lowe	andcc	%o0, 3, %o3		! is s1 aligned?
64*5d9d9091SRichard Lowe	bz,a,pn	%icc, .iss2		! if so go check s2
65*5d9d9091SRichard Lowe	andcc	%o1, 3, %o4		! is s2 aligned?
66*5d9d9091SRichard Lowe	cmp	%o3, 2
67*5d9d9091SRichard Lowe	be,pn	%icc, .algn2
68*5d9d9091SRichard Lowe	cmp	%o3, 3
69*5d9d9091SRichard Lowe
70*5d9d9091SRichard Lowe.algn1:	ldub	[%o0], %o4		! cmp one byte
71*5d9d9091SRichard Lowe	inc	%o0
72*5d9d9091SRichard Lowe	ldub	[%o1], %o5
73*5d9d9091SRichard Lowe	inc	%o1
74*5d9d9091SRichard Lowe	dec	%o2
75*5d9d9091SRichard Lowe	be,pn	%icc, .algn3
76*5d9d9091SRichard Lowe	cmp	%o4, %o5
77*5d9d9091SRichard Lowe	be,pt	%icc, .algn2
78*5d9d9091SRichard Lowe	nop
79*5d9d9091SRichard Lowe	b,a	.noteq
80*5d9d9091SRichard Lowe
81*5d9d9091SRichard Lowe.algn2:	lduh	[%o0], %o4
82*5d9d9091SRichard Lowe	inc	2, %o0
83*5d9d9091SRichard Lowe	ldub	[%o1], %o5
84*5d9d9091SRichard Lowe	inc	1, %o1
85*5d9d9091SRichard Lowe	srl	%o4, 8, %o3
86*5d9d9091SRichard Lowe	cmp	%o3, %o5
87*5d9d9091SRichard Lowe	be,a,pt	%icc, 1f
88*5d9d9091SRichard Lowe	ldub	[%o1], %o5		! delay slot, get next byte from s2
89*5d9d9091SRichard Lowe	b	.noteq
90*5d9d9091SRichard Lowe	mov	%o3, %o4		! delay slot, move *s1 to %o4
91*5d9d9091SRichard Lowe1:	inc	%o1
92*5d9d9091SRichard Lowe	dec	2, %o2
93*5d9d9091SRichard Lowe	and	%o4, 0xff, %o4
94*5d9d9091SRichard Lowe	cmp	%o4, %o5
95*5d9d9091SRichard Lowe.algn3:	be,a,pt	%icc, .iss2
96*5d9d9091SRichard Lowe	andcc	%o1, 3, %o4		! delay slot, is s2 aligned?
97*5d9d9091SRichard Lowe	b,a	.noteq
98*5d9d9091SRichard Lowe
99*5d9d9091SRichard Lowe.cmpbyt:b	.bytcmp
100*5d9d9091SRichard Lowe	deccc	%o2
101*5d9d9091SRichard Lowe1:	ldub	[%o0 + %o1], %o5	! byte compare loop
102*5d9d9091SRichard Lowe	inc	%o0
103*5d9d9091SRichard Lowe	cmp	%o4, %o5
104*5d9d9091SRichard Lowe	be,a,pt	%icc, .bytcmp
105*5d9d9091SRichard Lowe	deccc	%o2			! delay slot, compare count (len)
106*5d9d9091SRichard Lowe	b,a	.noteq
107*5d9d9091SRichard Lowe.bytcmp:bgeu,a,pt %xcc, 1b
108*5d9d9091SRichard Lowe	ldub	[%o0], %o4
109*5d9d9091SRichard Lowe.cmpeq:
110*5d9d9091SRichard Lowe	retl				! strings compare equal
111*5d9d9091SRichard Lowe	clr	%o0
112*5d9d9091SRichard Lowe
113*5d9d9091SRichard Lowe.noteq_word:				! words aren't equal. find unequal byte
114*5d9d9091SRichard Lowe	srl	%o4, 24, %o1		! first byte
115*5d9d9091SRichard Lowe	srl	%o5, 24, %o2
116*5d9d9091SRichard Lowe	cmp	%o1, %o2
117*5d9d9091SRichard Lowe	bne,pn	%icc, 1f
118*5d9d9091SRichard Lowe	sll	%o4, 8, %o4
119*5d9d9091SRichard Lowe	sll	%o5, 8, %o5
120*5d9d9091SRichard Lowe	srl	%o4, 24, %o1
121*5d9d9091SRichard Lowe	srl	%o5, 24, %o2
122*5d9d9091SRichard Lowe	cmp	%o1, %o2
123*5d9d9091SRichard Lowe	bne,pn	%icc, 1f
124*5d9d9091SRichard Lowe	sll	%o4, 8, %o4
125*5d9d9091SRichard Lowe	sll	%o5, 8, %o5
126*5d9d9091SRichard Lowe	srl	%o4, 24, %o1
127*5d9d9091SRichard Lowe	srl	%o5, 24, %o2
128*5d9d9091SRichard Lowe	cmp	%o1, %o2
129*5d9d9091SRichard Lowe	bne,pn	%icc, 1f
130*5d9d9091SRichard Lowe	sll	%o4, 8, %o4
131*5d9d9091SRichard Lowe	sll	%o5, 8, %o5
132*5d9d9091SRichard Lowe	srl	%o4, 24, %o1
133*5d9d9091SRichard Lowe	srl	%o5, 24, %o2
134*5d9d9091SRichard Lowe1:
135*5d9d9091SRichard Lowe	retl
136*5d9d9091SRichard Lowe	sub	%o1, %o2, %o0		! delay slot
137*5d9d9091SRichard Lowe
138*5d9d9091SRichard Lowe.noteq:
139*5d9d9091SRichard Lowe	retl				! strings aren't equal
140*5d9d9091SRichard Lowe	sub	%o4, %o5, %o0		! delay slot, return(*s1 - *s2)
141*5d9d9091SRichard Lowe
142*5d9d9091SRichard Lowe.iss2:	andn	%o2, 3, %o3		! count of aligned bytes
143*5d9d9091SRichard Lowe	and	%o2, 3, %o2		! remaining bytes
144*5d9d9091SRichard Lowe	bz,pn	%icc, .w4cmp		! if s2 word aligned, compare words
145*5d9d9091SRichard Lowe	cmp	%o4, 2
146*5d9d9091SRichard Lowe	be,pn	%icc, .w2cmp		! s2 half aligned
147*5d9d9091SRichard Lowe	cmp	%o4, 1
148*5d9d9091SRichard Lowe
149*5d9d9091SRichard Lowe.w3cmp:
150*5d9d9091SRichard Lowe	dec	4, %o3			! avoid reading beyond the last byte
151*5d9d9091SRichard Lowe	inc	4, %o2
152*5d9d9091SRichard Lowe	ldub	[%o1], %g1		! read a byte to align for word reads
153*5d9d9091SRichard Lowe	inc	1, %o1
154*5d9d9091SRichard Lowe	be,pt	%icc, .w1cmp		! aligned to 1 or 3 bytes
155*5d9d9091SRichard Lowe	sll	%g1, 24, %o5
156*5d9d9091SRichard Lowe
157*5d9d9091SRichard Lowe	sub	%o1, %o0, %o1
158*5d9d9091SRichard Lowe2:	lduw	[%o0 + %o1], %g1
159*5d9d9091SRichard Lowe	lduw	[%o0], %o4
160*5d9d9091SRichard Lowe	inc	4, %o0
161*5d9d9091SRichard Lowe	srl	%g1, 8, %g5		! merge with the other half
162*5d9d9091SRichard Lowe	or	%g5, %o5, %o5
163*5d9d9091SRichard Lowe	cmp	%o4, %o5
164*5d9d9091SRichard Lowe	bne,pt	%icc, .noteq_word
165*5d9d9091SRichard Lowe	deccc	4, %o3
166*5d9d9091SRichard Lowe	bnz,pt	%xcc, 2b
167*5d9d9091SRichard Lowe	sll	%g1, 24, %o5
168*5d9d9091SRichard Lowe	sub	%o1, 1, %o1		! used 3 bytes of the last word read
169*5d9d9091SRichard Lowe	b	.bytcmp
170*5d9d9091SRichard Lowe	deccc	%o2
171*5d9d9091SRichard Lowe
172*5d9d9091SRichard Lowe.w1cmp:
173*5d9d9091SRichard Lowe	dec	4, %o3			! avoid reading beyond the last byte
174*5d9d9091SRichard Lowe	inc	4, %o2
175*5d9d9091SRichard Lowe	lduh	[%o1], %g1		! read 3 bytes to word align
176*5d9d9091SRichard Lowe	inc	2, %o1
177*5d9d9091SRichard Lowe	sll	%g1, 8, %g5
178*5d9d9091SRichard Lowe	or	%o5, %g5, %o5
179*5d9d9091SRichard Lowe
180*5d9d9091SRichard Lowe	sub	%o1, %o0, %o1
181*5d9d9091SRichard Lowe3:	lduw	[%o0 + %o1], %g1
182*5d9d9091SRichard Lowe	lduw	[%o0], %o4
183*5d9d9091SRichard Lowe	inc	4, %o0
184*5d9d9091SRichard Lowe	srl	%g1, 24, %g5		! merge with the other half
185*5d9d9091SRichard Lowe	or	%g5, %o5, %o5
186*5d9d9091SRichard Lowe	cmp	%o4, %o5
187*5d9d9091SRichard Lowe	bne,pt	%icc, .noteq_word
188*5d9d9091SRichard Lowe	deccc	4, %o3
189*5d9d9091SRichard Lowe	bnz,pt	%xcc, 3b
190*5d9d9091SRichard Lowe	sll	%g1, 8, %o5
191*5d9d9091SRichard Lowe	sub	%o1, 3, %o1		! used 1 byte of the last word read
192*5d9d9091SRichard Lowe	b	.bytcmp
193*5d9d9091SRichard Lowe	deccc	%o2
194*5d9d9091SRichard Lowe
195*5d9d9091SRichard Lowe.w2cmp:
196*5d9d9091SRichard Lowe	dec	4, %o3			! avoid reading beyond the last byte
197*5d9d9091SRichard Lowe	inc	4, %o2
198*5d9d9091SRichard Lowe	lduh	[%o1], %g1		! read a halfword to align s2
199*5d9d9091SRichard Lowe	inc	2, %o1
200*5d9d9091SRichard Lowe	sll	%g1, 16, %o5
201*5d9d9091SRichard Lowe	sub	%o1, %o0, %o1
202*5d9d9091SRichard Lowe4:	lduw	[%o0 + %o1], %g1	! read a word from s2
203*5d9d9091SRichard Lowe	lduw	[%o0], %o4		! read a word from s1
204*5d9d9091SRichard Lowe	inc	4, %o0
205*5d9d9091SRichard Lowe	srl	%g1, 16, %g5		! merge with the other half
206*5d9d9091SRichard Lowe	or	%g5, %o5, %o5
207*5d9d9091SRichard Lowe	cmp	%o4, %o5
208*5d9d9091SRichard Lowe	bne,pn	%icc, .noteq_word
209*5d9d9091SRichard Lowe	deccc	4, %o3
210*5d9d9091SRichard Lowe	bnz,pt	%xcc, 4b
211*5d9d9091SRichard Lowe	sll	%g1, 16, %o5
212*5d9d9091SRichard Lowe	sub	%o1, 2, %o1		! only used half of the last read word
213*5d9d9091SRichard Lowe	b	.bytcmp
214*5d9d9091SRichard Lowe	deccc	%o2
215*5d9d9091SRichard Lowe
216*5d9d9091SRichard Lowe.w4cmp:
217*5d9d9091SRichard Lowe	sub	%o1, %o0, %o1
218*5d9d9091SRichard Lowe	lduw	[%o0 + %o1], %o5
219*5d9d9091SRichard Lowe5:	lduw	[%o0], %o4
220*5d9d9091SRichard Lowe	inc	4, %o0
221*5d9d9091SRichard Lowe	cmp	%o4, %o5
222*5d9d9091SRichard Lowe	bne,pt	%icc, .noteq_word
223*5d9d9091SRichard Lowe	deccc	4, %o3
224*5d9d9091SRichard Lowe	bnz,a,pt %xcc, 5b
225*5d9d9091SRichard Lowe	lduw	[%o0 + %o1], %o5
226*5d9d9091SRichard Lowe	b	.bytcmp			! compare remaining bytes, if any
227*5d9d9091SRichard Lowe	deccc	%o2
228*5d9d9091SRichard Lowe
229*5d9d9091SRichard Lowe	SET_SIZE(memcmp)
230