xref: /netbsd-src/common/lib/libc/arch/powerpc/string/memcmp.S (revision 9137531a8c8228b2521c1e62169267adec0da379)
1*9137531aSmatt/* $NetBSD: memcmp.S,v 1.4 2013/07/04 20:57:59 matt Exp $ */
22ba84f0fSgarbled
32ba84f0fSgarbled/* stropt/memcmp.S, pl_string_common, pl_linux 10/11/04 11:45:35
42ba84f0fSgarbled * ==========================================================================
52ba84f0fSgarbled * Optimized memcmp implementation for IBM PowerPC 405/440.
62ba84f0fSgarbled *
72ba84f0fSgarbled *	Copyright (c) 2003, IBM Corporation
82ba84f0fSgarbled *	All rights reserved.
92ba84f0fSgarbled *
102ba84f0fSgarbled *	Redistribution and use in source and binary forms, with or
112ba84f0fSgarbled *	without modification, are permitted provided that the following
122ba84f0fSgarbled *	conditions are met:
132ba84f0fSgarbled *
142ba84f0fSgarbled *	* Redistributions of source code must retain the above
152ba84f0fSgarbled *	copyright notice, this list of conditions and the following
162ba84f0fSgarbled *	disclaimer.
172ba84f0fSgarbled *	* Redistributions in binary form must reproduce the above
182ba84f0fSgarbled *	copyright notice, this list of conditions and the following
192ba84f0fSgarbled *	disclaimer in the documentation and/or other materials
202ba84f0fSgarbled *	provided with the distribution.
212ba84f0fSgarbled *	* Neither the name of IBM nor the names of its contributors
222ba84f0fSgarbled *	may be used to endorse or promote products derived from this
232ba84f0fSgarbled *	software without specific prior written permission.
242ba84f0fSgarbled *
252ba84f0fSgarbled *	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
262ba84f0fSgarbled *	CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
272ba84f0fSgarbled *	INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
282ba84f0fSgarbled *	MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
292ba84f0fSgarbled *	DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
302ba84f0fSgarbled *	BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
312ba84f0fSgarbled *	OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
322ba84f0fSgarbled *	PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
332ba84f0fSgarbled *	PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
342ba84f0fSgarbled *	OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
352ba84f0fSgarbled *	(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
362ba84f0fSgarbled *	USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
372ba84f0fSgarbled *
382ba84f0fSgarbled * ==========================================================================
392ba84f0fSgarbled *
402ba84f0fSgarbled * Function: Compare two character strings (up to n characters)
412ba84f0fSgarbled *
422ba84f0fSgarbled *		int memcmp(const char *s1, const char *s2, int n)
432ba84f0fSgarbled *
442ba84f0fSgarbled * Input:	r3 - buffer 1 address
452ba84f0fSgarbled *	 	r4 - buffer 2 address
462ba84f0fSgarbled *	 	r5 - maximum characters to compare
472ba84f0fSgarbled * Output: r3 <0 (less), 0 (equal), >0 (greater)
482ba84f0fSgarbled *
492ba84f0fSgarbled * ==========================================================================
502ba84f0fSgarbled */
512ba84f0fSgarbled
522ba84f0fSgarbled#include <machine/asm.h>
532ba84f0fSgarbled
542ba84f0fSgarbled        .text
552ba84f0fSgarbled        .align 4
562ba84f0fSgarbled/* LINTSTUB: Func: void *memcmp(const void *, const void *, size_t) */
572ba84f0fSgarbledENTRY(memcmp)
582ba84f0fSgarbled
592ba84f0fSgarbled	/*
602ba84f0fSgarbled	 * Check count passed in R5. If zero, return 0; otherwise continue.
612ba84f0fSgarbled	 */
622ba84f0fSgarbled	cmpwi	%r5,0
632ba84f0fSgarbled	beq-	ret_0;
642ba84f0fSgarbled
652ba84f0fSgarbled	/*
662ba84f0fSgarbled	 * Most of the time the difference is found in the first
672ba84f0fSgarbled	 * several bytes.  The following code minimizes the number
682ba84f0fSgarbled	 * of load operations for short compares.
692ba84f0fSgarbled	 */
702ba84f0fSgarbled
712ba84f0fSgarbled	mr	%r11, %r3		/* Save buffer 1		*/
722ba84f0fSgarbled
732ba84f0fSgarbledagain:
742ba84f0fSgarbled
752ba84f0fSgarbled	not	%r10, %r4		/* buffer 2: bytes to page bdy	*/
762ba84f0fSgarbled	rlwinm.	%r10, %r10,29,23,31	/* buffer 2: dwords to page bdy	*/
772ba84f0fSgarbled	beq-	bytebybyte		/* If < 8 bytes to the page bdy */
782ba84f0fSgarbled					/* do byte by byte		*/
792ba84f0fSgarbled	lwz	%r8, 0(%r4)		/* load 1st buffer 2 word	*/
802ba84f0fSgarbled
812ba84f0fSgarbled	not	%r12, %r11		/* buffer 1: bytes to page bdy	*/
822ba84f0fSgarbled	rlwinm.	%r12, %r12,29,23,31	/* buffer 1: dwords to page bdy	*/
832ba84f0fSgarbled	beq-	bytebybyte		/* If < 8 bytes to the page bdy */
842ba84f0fSgarbled					/* do byte by byte		*/
852ba84f0fSgarbled	lwz	%r6, 0(%r11)		/* load 1st buffer 1 word	*/
862ba84f0fSgarbled
872ba84f0fSgarbled	cmpwi	%r5, 4			/* If remaining count <= 4	*/
882ba84f0fSgarbled	ble+	first4			/* handle specially.	DWG	*/
892ba84f0fSgarbled
902ba84f0fSgarbled	cmplw	%r8, %r6		/* compare buffer 2 and buffer 1*/
912ba84f0fSgarbled	bne+	all_done		/* different => we're done	*/
922ba84f0fSgarbled
932ba84f0fSgarbled	lwzu	%r9, 4(%r4)		/* load 2nd buffer 2 word	*/
942ba84f0fSgarbled	lwzu	%r7, 4(%r11)		/* load 2nd buffer 1 word	*/
952ba84f0fSgarbled
962ba84f0fSgarbled	cmpwi	%r5, 8			/* If remaining count <= 8	*/
972ba84f0fSgarbled	ble+	last4			/* handle specially.	DWG	*/
982ba84f0fSgarbled
992ba84f0fSgarbled	cmplw	%r9, %r7		/* compare buffer 2 and buffer 1*/
1002ba84f0fSgarbled	bne+	all_done		/* different => we're done	*/
1012ba84f0fSgarbled
1022ba84f0fSgarbled	addi	%r5, %r5, -8		/* Update character counter DWG */
1032ba84f0fSgarbled	addi	%r10, %r4, 0x0004	/* DWG*/
1042ba84f0fSgarbled	not	%r10, %r10		/* buffer 2: bytes to page bdy DWG */
1052ba84f0fSgarbled	rlwinm.	%r10, %r10,29,23,31	/* buffer 2: dwords to page bdy	DWG */
1062ba84f0fSgarbled	addi	%r12, %r11, 0x0004	/* DWG */
1072ba84f0fSgarbled	not	%r12, %r12		/* buffer 1: bytes to page bdy DWG */
1082ba84f0fSgarbled	rlwinm.	%r12, %r12,29,23,31	/* buffer 1: dwords to page bdy	DWG */
1092ba84f0fSgarbled
1102ba84f0fSgarbled	/* The following section prior to loop: figures out whether	*/
1112ba84f0fSgarbled	/* the buffer 1 or buffer 2 is closer to the page boundary.	*/
1122ba84f0fSgarbled	/* The main loop count is then set up to reflect the number of	*/
1132ba84f0fSgarbled	/* double words of the buffer that is closest			*/
1142ba84f0fSgarbled
1152ba84f0fSgarbled	cmpw	%r10, %r12		/* Find closest			*/
1162ba84f0fSgarbled	blt	lt
1172ba84f0fSgarbled
1182ba84f0fSgarbled	mr	%r10, %r12
1192ba84f0fSgarbled
1202ba84f0fSgarbledlt:
1212ba84f0fSgarbled
1222ba84f0fSgarbled	srwi	%r12, %r5, 3		/* Double check the total count */
1232ba84f0fSgarbled	cmpw	%r10, %r12		/* limitation			*/
1242ba84f0fSgarbled	blt	lt2
1252ba84f0fSgarbled
1262ba84f0fSgarbled	mr	%r10, %r12		/* DWG */
1272ba84f0fSgarbledlt2:					/* DWG */
1282ba84f0fSgarbled	cmpwi	%r10, 0			/* DWG */
1292ba84f0fSgarbled	bne	lt3			/* DWG */
1302ba84f0fSgarbled	addi	%r4, %r4, 0x0004	/* DWG */
1312ba84f0fSgarbled	addi	%r11,%r11,0x0004	/* DWG */
1322ba84f0fSgarbled	b	again			/* DWG */
1332ba84f0fSgarbledlt3:					/* DWG */
1342ba84f0fSgarbled	mtctr	%r10			/* dword count for loop		*/
1352ba84f0fSgarbled	lwzu	%r6, 4(%r11)		/* pre-load buffer 1 word	*/
1362ba84f0fSgarbled
1372ba84f0fSgarbled	b	in			/* To the loop			*/
1382ba84f0fSgarbled
1392ba84f0fSgarbledloop:					/* main loop			*/
1402ba84f0fSgarbled
1412ba84f0fSgarbled	cmplw	%r8, %r6		/* Compare first buffer 2 word	*/
1422ba84f0fSgarbled	bne-	all_done		/* with first buffer 1 word	*/
1432ba84f0fSgarbled					/* If different, we're done	*/
1442ba84f0fSgarbled	cmplw	%r9, %r7		/* Compare second buffer 2 word	*/
1452ba84f0fSgarbled					/* with second buffer 1 word	*/
1462ba84f0fSgarbled	lwzu	%r6, 4(%r11)		/* pre-load buffer 1 word	*/
1472ba84f0fSgarbled
1482ba84f0fSgarbled	bne-	all_done		/* If different, we're done	*/
1492ba84f0fSgarbled
1502ba84f0fSgarbledin:
1512ba84f0fSgarbled
1522ba84f0fSgarbled	lwzu	%r7, 4(%r11)		/* pre-load buffer 1 word	*/
1532ba84f0fSgarbled	lwzu	%r8, 4(%r4)		/* pre-load buffer 2 word	*/
1542ba84f0fSgarbled	lwzu	%r9, 4(%r4)		/* pre-load buffer 2 word	*/
1552ba84f0fSgarbled
1562ba84f0fSgarbled	bdnz+	loop			/* Do more DW's if cnt > 0	*/
1572ba84f0fSgarbled
1582ba84f0fSgarbled	/*mfctr	%r12*/ /*DWG*/		/* number of dwords left	*/
1592ba84f0fSgarbled	/*subf	%r10, %r12, %r10*/ /*DWG*//* number of dwords compared	*/
1602ba84f0fSgarbled	slwi	%r10, %r10, 3
1612ba84f0fSgarbled	subf	%r5, %r10, %r5		/* adjust byte counter		*/
1622ba84f0fSgarbled	/*bne+	partial*/ /*DWG*/	/* If less than 8 bytes, handle */
1632ba84f0fSgarbled					/* specially			*/
1642ba84f0fSgarbled	/*cmpwi	%r5, 8*/		/* Removed.		 DWG */
1652ba84f0fSgarbled	/*blt	partial*/		/* Removed.		 DWG */
1662ba84f0fSgarbled
1672ba84f0fSgarbled	/*addic	%r5, %r5, -8*/ /*DWG*/	/* Subtract two words from count*/
1682ba84f0fSgarbled
1692ba84f0fSgarbled	cmplw	%r8, %r6		/* compare last dword		*/
1702ba84f0fSgarbled	addi	%r4, %r4, 4
1712ba84f0fSgarbled	bne-	all_done
1722ba84f0fSgarbled
1732ba84f0fSgarbled	cmplw	%r9, %r7
1742ba84f0fSgarbled	addi	%r11, %r11, 4
1752ba84f0fSgarbled	bne-	all_done
1762ba84f0fSgarbled
1772ba84f0fSgarbledbytebybyte:
1782ba84f0fSgarbled
1792ba84f0fSgarbled	/* We've gotten close to a page boundary: do a byte-byte-byte
1802ba84f0fSgarbled	 * compare for the following 8 bytes, and then go back to
1812ba84f0fSgarbled	 * the full-word compare loop.
1822ba84f0fSgarbled	 */
1832ba84f0fSgarbled
1842ba84f0fSgarbled	li	%r3, 8			/* loop count			*/
1852ba84f0fSgarbled	cmpw	%r3, %r5		/* take min(8, counter)		*/
1862ba84f0fSgarbled	ble	f2
1872ba84f0fSgarbled
1882ba84f0fSgarbled	mr.	%r3, %r5
1892ba84f0fSgarbled
1902ba84f0fSgarbled	beqlr
1912ba84f0fSgarbled
1922ba84f0fSgarbledf2:
1932ba84f0fSgarbled
1942ba84f0fSgarbled	mtctr	%r3
1952ba84f0fSgarbled	subf	%r5, %r3, %r5		/* adjust counter		*/
1962ba84f0fSgarbled
1972ba84f0fSgarbledbbb:
1982ba84f0fSgarbled
1992ba84f0fSgarbled	lbz	%r6, 0(%r11)		/* byte copy loop		*/
2002ba84f0fSgarbled
2012ba84f0fSgarbled	addi	%r11, %r11, 1
2022ba84f0fSgarbled
2032ba84f0fSgarbled	lbz	%r8, 0(%r4)
2042ba84f0fSgarbled
2052ba84f0fSgarbled	addi	%r4, %r4, 1
2062ba84f0fSgarbled
2072ba84f0fSgarbled	cmplw	%r8, %r6
2082ba84f0fSgarbled
209*9137531aSmatt	bdnzt	eq, bbb
2102ba84f0fSgarbled
2112ba84f0fSgarbled	bne	all_done
2122ba84f0fSgarbled
2132ba84f0fSgarbled	cmpwi	%r5, 0
2142ba84f0fSgarbled	bgt	again			/* handle the rest		*/
2152ba84f0fSgarbled
2162ba84f0fSgarbled	xor	%r3,%r3,%r3
2172ba84f0fSgarbled
2182ba84f0fSgarbled	blr
2192ba84f0fSgarbled
2202ba84f0fSgarbled#if 0 /* Removed code section. DWG */
2212ba84f0fSgarbledpartial:
2222ba84f0fSgarbled
2232ba84f0fSgarbled	mr.	%r3, %r5
2242ba84f0fSgarbled
2252ba84f0fSgarbled	beqlr				/* If count -> 0, we're done	*/
2262ba84f0fSgarbled
2272ba84f0fSgarbledf1:
2282ba84f0fSgarbled
2292ba84f0fSgarbled	subfic	%r3, %r3, 4		/* zero/end in first word?	*/
2302ba84f0fSgarbled	cmpwi	%r3, 0
2312ba84f0fSgarbled	blt	last4
2322ba84f0fSgarbled#endif /* DWG */
2332ba84f0fSgarbled
2342ba84f0fSgarbledfirst4:
2352ba84f0fSgarbled	subfic	%r3, %r5, 4		/* If count <= 4, handle 	*/
2362ba84f0fSgarbled	rlwinm	%r3, %r3, 3, 0, 31	/* count *= 8			*/
2372ba84f0fSgarbled	srw	%r6, %r6, %r3		/* align 1st buffer 1 word	*/
2382ba84f0fSgarbled	srw	%r8, %r8, %r3		/* align 1st buffer 2 word	*/
2392ba84f0fSgarbled
2402ba84f0fSgarbled	cmplw	%r8, %r6		/* get result			*/
2412ba84f0fSgarbled	bne	all_done
2422ba84f0fSgarbled	xor	%r3,%r3,%r3
2432ba84f0fSgarbled	blr
2442ba84f0fSgarbled
2452ba84f0fSgarbledlast4:
2462ba84f0fSgarbled	subfic	%r10, %r5, 8		/*DWG*/
2472ba84f0fSgarbled	rlwinm	%r10, %r10, 3, 0, 31	/* count *= 8			*/
2482ba84f0fSgarbled	srw	%r7, %r7, %r10		/* align 2nd buffer 1 word	*/
2492ba84f0fSgarbled	srw	%r9, %r9, %r10		/* align 2nd buffer 2 word	*/
2502ba84f0fSgarbled
2512ba84f0fSgarbled	cmplw	%r9, %r7		/* get result			*/
2522ba84f0fSgarbled	bne	all_done
2532ba84f0fSgarbledret_0:
2542ba84f0fSgarbled	xor	%r3,%r3,%r3		/* Equal result		 */
2552ba84f0fSgarbled	blr
2562ba84f0fSgarbled
2572ba84f0fSgarbledall_done:
2582ba84f0fSgarbled
2592ba84f0fSgarbled	blt	finish_lt
2602ba84f0fSgarbled
2612ba84f0fSgarbled	addi	%r3,0,-1		/* Less than result		*/
2622ba84f0fSgarbled
2632ba84f0fSgarbled	blr
2642ba84f0fSgarbled
2652ba84f0fSgarbledfinish_lt:
2662ba84f0fSgarbled
2672ba84f0fSgarbled	addi	%r3,0,1			/* Greater than result		*/
2682ba84f0fSgarbled
2692ba84f0fSgarbled	blr
270cf88c389SmattEND(memcmp)
271