xref: /netbsd-src/common/lib/libc/arch/powerpc/string/memcmp.S (revision b1c86f5f087524e68db12794ee9c3e3da1ab17a0)
1/* $NetBSD: memcmp.S,v 1.2 2008/03/06 21:17:17 phx Exp $ */
2
3/* stropt/memcmp.S, pl_string_common, pl_linux 10/11/04 11:45:35
4 * ==========================================================================
5 * Optimized memcmp implementation for IBM PowerPC 405/440.
6 *
7 *	Copyright (c) 2003, IBM Corporation
8 *	All rights reserved.
9 *
10 *	Redistribution and use in source and binary forms, with or
11 *	without modification, are permitted provided that the following
12 *	conditions are met:
13 *
14 *	* Redistributions of source code must retain the above
15 *	copyright notice, this list of conditions and the following
16 *	disclaimer.
17 *	* Redistributions in binary form must reproduce the above
18 *	copyright notice, this list of conditions and the following
19 *	disclaimer in the documentation and/or other materials
20 *	provided with the distribution.
21 *	* Neither the name of IBM nor the names of its contributors
22 *	may be used to endorse or promote products derived from this
23 *	software without specific prior written permission.
24 *
25 *	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
26 *	CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
27 *	INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
28 *	MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
29 *	DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
30 *	BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
31 *	OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 *	PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 *	PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
34 *	OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35 *	(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
36 *	USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 *
38 * ==========================================================================
39 *
40 * Function: Compare two character strings (up to n characters)
41 *
42 *		int memcmp(const char *s1, const char *s2, int n)
43 *
44 * Input:	r3 - buffer 1 address
45 *	 	r4 - buffer 2 address
46 *	 	r5 - maximum characters to compare
47 * Output: r3 <0 (less), 0 (equal), >0 (greater)
48 *
49 * ==========================================================================
50 */
51
52#define _NOREGNAMES
53#include <machine/asm.h>
54
55        .text
56        .align 4
57/* LINTSTUB: Func: void *memcmp(const void *, const void *, size_t) */
58ENTRY(memcmp)
59
60	/*
61	 * Check count passed in R5. If zero, return 0; otherwise continue.
62	 */
63	cmpwi	%r5,0
64	beq-	ret_0;
65
66	/*
67	 * Most of the time the difference is found in the first
68	 * several bytes.  The following code minimizes the number
69	 * of load operations for short compares.
70	 */
71
72	mr	%r11, %r3		/* Save buffer 1		*/
73
74again:
75
76	not	%r10, %r4		/* buffer 2: bytes to page bdy	*/
77	rlwinm.	%r10, %r10,29,23,31	/* buffer 2: dwords to page bdy	*/
78	beq-	bytebybyte		/* If < 8 bytes to the page bdy */
79					/* do byte by byte		*/
80	lwz	%r8, 0(%r4)		/* load 1st buffer 2 word	*/
81
82	not	%r12, %r11		/* buffer 1: bytes to page bdy	*/
83	rlwinm.	%r12, %r12,29,23,31	/* buffer 1: dwords to page bdy	*/
84	beq-	bytebybyte		/* If < 8 bytes to the page bdy */
85					/* do byte by byte		*/
86	lwz	%r6, 0(%r11)		/* load 1st buffer 1 word	*/
87
88	cmpwi	%r5, 4			/* If remaining count <= 4	*/
89	ble+	first4			/* handle specially.	DWG	*/
90
91	cmplw	%r8, %r6		/* compare buffer 2 and buffer 1*/
92	bne+	all_done		/* different => we're done	*/
93
94	lwzu	%r9, 4(%r4)		/* load 2nd buffer 2 word	*/
95	lwzu	%r7, 4(%r11)		/* load 2nd buffer 1 word	*/
96
97	cmpwi	%r5, 8			/* If remaining count <= 8	*/
98	ble+	last4			/* handle specially.	DWG	*/
99
100	cmplw	%r9, %r7		/* compare buffer 2 and buffer 1*/
101	bne+	all_done		/* different => we're done	*/
102
103	addi	%r5, %r5, -8		/* Update character counter DWG */
104	addi	%r10, %r4, 0x0004	/* DWG*/
105	not	%r10, %r10		/* buffer 2: bytes to page bdy DWG */
106	rlwinm.	%r10, %r10,29,23,31	/* buffer 2: dwords to page bdy	DWG */
107	addi	%r12, %r11, 0x0004	/* DWG */
108	not	%r12, %r12		/* buffer 1: bytes to page bdy DWG */
109	rlwinm.	%r12, %r12,29,23,31	/* buffer 1: dwords to page bdy	DWG */
110
111	/* The following section prior to loop: figures out whether	*/
112	/* the buffer 1 or buffer 2 is closer to the page boundary.	*/
113	/* The main loop count is then set up to reflect the number of	*/
114	/* double words of the buffer that is closest			*/
115
116	cmpw	%r10, %r12		/* Find closest			*/
117	blt	lt
118
119	mr	%r10, %r12
120
121lt:
122
123	srwi	%r12, %r5, 3		/* Double check the total count */
124	cmpw	%r10, %r12		/* limitation			*/
125	blt	lt2
126
127	mr	%r10, %r12		/* DWG */
128lt2:					/* DWG */
129	cmpwi	%r10, 0			/* DWG */
130	bne	lt3			/* DWG */
131	addi	%r4, %r4, 0x0004	/* DWG */
132	addi	%r11,%r11,0x0004	/* DWG */
133	b	again			/* DWG */
134lt3:					/* DWG */
135	mtctr	%r10			/* dword count for loop		*/
136	lwzu	%r6, 4(%r11)		/* pre-load buffer 1 word	*/
137
138	b	in			/* To the loop			*/
139
140loop:					/* main loop			*/
141
142	cmplw	%r8, %r6		/* Compare first buffer 2 word	*/
143	bne-	all_done		/* with first buffer 1 word	*/
144					/* If different, we're done	*/
145	cmplw	%r9, %r7		/* Compare second buffer 2 word	*/
146					/* with second buffer 1 word	*/
147	lwzu	%r6, 4(%r11)		/* pre-load buffer 1 word	*/
148
149	bne-	all_done		/* If different, we're done	*/
150
151in:
152
153	lwzu	%r7, 4(%r11)		/* pre-load buffer 1 word	*/
154	lwzu	%r8, 4(%r4)		/* pre-load buffer 2 word	*/
155	lwzu	%r9, 4(%r4)		/* pre-load buffer 2 word	*/
156
157	bdnz+	loop			/* Do more DW's if cnt > 0	*/
158
159	/*mfctr	%r12*/ /*DWG*/		/* number of dwords left	*/
160	/*subf	%r10, %r12, %r10*/ /*DWG*//* number of dwords compared	*/
161	slwi	%r10, %r10, 3
162	subf	%r5, %r10, %r5		/* adjust byte counter		*/
163	/*bne+	partial*/ /*DWG*/	/* If less than 8 bytes, handle */
164					/* specially			*/
165	/*cmpwi	%r5, 8*/		/* Removed.		 DWG */
166	/*blt	partial*/		/* Removed.		 DWG */
167
168	/*addic	%r5, %r5, -8*/ /*DWG*/	/* Subtract two words from count*/
169
170	cmplw	%r8, %r6		/* compare last dword		*/
171	addi	%r4, %r4, 4
172	bne-	all_done
173
174	cmplw	%r9, %r7
175	addi	%r11, %r11, 4
176	bne-	all_done
177
178bytebybyte:
179
180	/* We've gotten close to a page boundary: do a byte-byte-byte
181	 * compare for the following 8 bytes, and then go back to
182	 * the full-word compare loop.
183	 */
184
185	li	%r3, 8			/* loop count			*/
186	cmpw	%r3, %r5		/* take min(8, counter)		*/
187	ble	f2
188
189	mr.	%r3, %r5
190
191	beqlr
192
193f2:
194
195	mtctr	%r3
196	subf	%r5, %r3, %r5		/* adjust counter		*/
197
198bbb:
199
200	lbz	%r6, 0(%r11)		/* byte copy loop		*/
201
202	addi	%r11, %r11, 1
203
204	lbz	%r8, 0(%r4)
205
206	addi	%r4, %r4, 1
207
208	cmplw	%r8, %r6
209
210	bdnzt+	eq, bbb
211
212	bne	all_done
213
214	cmpwi	%r5, 0
215	bgt	again			/* handle the rest		*/
216
217	xor	%r3,%r3,%r3
218
219	blr
220
221#if 0 /* Removed code section. DWG */
222partial:
223
224	mr.	%r3, %r5
225
226	beqlr				/* If count -> 0, we're done	*/
227
228f1:
229
230	subfic	%r3, %r3, 4		/* zero/end in first word?	*/
231	cmpwi	%r3, 0
232	blt	last4
233#endif /* DWG */
234
235first4:
236	subfic	%r3, %r5, 4		/* If count <= 4, handle 	*/
237	rlwinm	%r3, %r3, 3, 0, 31	/* count *= 8			*/
238	srw	%r6, %r6, %r3		/* align 1st buffer 1 word	*/
239	srw	%r8, %r8, %r3		/* align 1st buffer 2 word	*/
240
241	cmplw	%r8, %r6		/* get result			*/
242	bne	all_done
243	xor	%r3,%r3,%r3
244	blr
245
246last4:
247	subfic	%r10, %r5, 8		/*DWG*/
248	rlwinm	%r10, %r10, 3, 0, 31	/* count *= 8			*/
249	srw	%r7, %r7, %r10		/* align 2nd buffer 1 word	*/
250	srw	%r9, %r9, %r10		/* align 2nd buffer 2 word	*/
251
252	cmplw	%r9, %r7		/* get result			*/
253	bne	all_done
254ret_0:
255	xor	%r3,%r3,%r3		/* Equal result		 */
256	blr
257
258all_done:
259
260	blt	finish_lt
261
262	addi	%r3,0,-1		/* Less than result		*/
263
264	blr
265
266finish_lt:
267
268	addi	%r3,0,1			/* Greater than result		*/
269
270	blr
271