xref: /onnv-gate/usr/src/lib/libc/sparc/gen/strcpy.s (revision 7298:b69e27387f74)
10Sstevel@tonic-gate/*
20Sstevel@tonic-gate * CDDL HEADER START
30Sstevel@tonic-gate *
40Sstevel@tonic-gate * The contents of this file are subject to the terms of the
56812Sraf * Common Development and Distribution License (the "License").
66812Sraf * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate *
80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate * See the License for the specific language governing permissions
110Sstevel@tonic-gate * and limitations under the License.
120Sstevel@tonic-gate *
130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate *
190Sstevel@tonic-gate * CDDL HEADER END
200Sstevel@tonic-gate */
216812Sraf
220Sstevel@tonic-gate/*
236812Sraf * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
240Sstevel@tonic-gate * Use is subject to license terms.
250Sstevel@tonic-gate */
260Sstevel@tonic-gate
27*7298SMark.J.Nelson@Sun.COM	.file	"strcpy.s"
280Sstevel@tonic-gate
290Sstevel@tonic-gate/*
300Sstevel@tonic-gate * strcpy(s1, s2)
310Sstevel@tonic-gate *
320Sstevel@tonic-gate * Copy string s2 to s1.  s1 must be large enough. Return s1.
330Sstevel@tonic-gate *
340Sstevel@tonic-gate * Fast assembler language version of the following C-program strcpy
350Sstevel@tonic-gate * which represents the `standard' for the C-library.
360Sstevel@tonic-gate *
370Sstevel@tonic-gate *	char *
380Sstevel@tonic-gate *	strcpy(s1, s2)
390Sstevel@tonic-gate *	register char *s1;
400Sstevel@tonic-gate *	register const char *s2;
410Sstevel@tonic-gate *	{
420Sstevel@tonic-gate *		char *os1 = s1;
430Sstevel@tonic-gate *
440Sstevel@tonic-gate *		while(*s1++ = *s2++)
450Sstevel@tonic-gate *			;
460Sstevel@tonic-gate *		return(os1);
470Sstevel@tonic-gate *	}
480Sstevel@tonic-gate *
490Sstevel@tonic-gate */
500Sstevel@tonic-gate
510Sstevel@tonic-gate#include <sys/asm_linkage.h>
520Sstevel@tonic-gate
530Sstevel@tonic-gate	! This is a 32-bit implementation of strcpy.  It works by
540Sstevel@tonic-gate	! first checking the alignment of its source pointer. And,
550Sstevel@tonic-gate	! if it is not aligned, attempts to copy bytes until it is.
560Sstevel@tonic-gate	! once this has occurred, the copy takes place, while checking
570Sstevel@tonic-gate	! for zero bytes, based upon destination alignment.
580Sstevel@tonic-gate	! Methods exist to handle per-byte, half-word, and word sized
590Sstevel@tonic-gate	! copies.
600Sstevel@tonic-gate
610Sstevel@tonic-gate	ENTRY(strcpy)
620Sstevel@tonic-gate
630Sstevel@tonic-gate	.align 32
640Sstevel@tonic-gate
650Sstevel@tonic-gate	sub	%o1, %o0, %o3		! src - dst
660Sstevel@tonic-gate	andcc	%o1, 3, %o4		! src word aligned ?
670Sstevel@tonic-gate	bz	.srcaligned		! yup
680Sstevel@tonic-gate	mov	%o0, %o2		! save dst
690Sstevel@tonic-gate
700Sstevel@tonic-gate	cmp	%o4, 2			! src halfword aligned
710Sstevel@tonic-gate	be	.s2aligned		! yup
720Sstevel@tonic-gate	ldub	[%o2 + %o3], %o1	! src[0]
730Sstevel@tonic-gate	tst	%o1			! byte zero?
740Sstevel@tonic-gate	stb	%o1, [%o2]		! store first byte
750Sstevel@tonic-gate	bz	.done			! yup, done
760Sstevel@tonic-gate	cmp	%o4, 3			! only one byte needed to align?
770Sstevel@tonic-gate	bz	.srcaligned		! yup
780Sstevel@tonic-gate	inc	%o2			! src++, dst++
790Sstevel@tonic-gate
800Sstevel@tonic-gate.s2aligned:
810Sstevel@tonic-gate	lduh	[%o2 + %o3], %o1	! src[]
820Sstevel@tonic-gate	srl	%o1, 8, %o4		! %o4<7:0> = first byte
830Sstevel@tonic-gate	tst	%o4			! first byte zero ?
840Sstevel@tonic-gate	bz	.done			! yup, done
850Sstevel@tonic-gate	stb	%o4, [%o2]		! store first byte
860Sstevel@tonic-gate	andcc	%o1, 0xff, %g0		! second byte zero ?
870Sstevel@tonic-gate	bz	.done			! yup, done
880Sstevel@tonic-gate	stb	%o1, [%o2 + 1]		! store second byte
890Sstevel@tonic-gate	add	%o2, 2, %o2		! src += 2, dst += 2
900Sstevel@tonic-gate
910Sstevel@tonic-gate.srcaligned:
920Sstevel@tonic-gate	sethi	%hi(0x01010101), %o4	! Alan Mycroft's magic1
930Sstevel@tonic-gate	sethi	%hi(0x80808080), %o5	! Alan Mycroft's magic2
940Sstevel@tonic-gate	or	%o4, %lo(0x01010101), %o4
950Sstevel@tonic-gate	andcc	%o2, 3, %o1		! destination word aligned?
960Sstevel@tonic-gate	bnz	.dstnotaligned		! nope
970Sstevel@tonic-gate	or	%o5, %lo(0x80808080), %o5
980Sstevel@tonic-gate
990Sstevel@tonic-gate.copyword:
1000Sstevel@tonic-gate	lduw	[%o2 + %o3], %o1	! src word
1010Sstevel@tonic-gate	add	%o2, 4, %o2		! src += 4, dst += 4
1020Sstevel@tonic-gate	andn	%o5, %o1, %g1		! ~word & 0x80808080
1030Sstevel@tonic-gate	sub	%o1, %o4, %o1		! word - 0x01010101
1040Sstevel@tonic-gate	andcc	%o1, %g1, %g0		! ((word - 0x01010101) & ~word & 0x80808080)
1050Sstevel@tonic-gate	add	%o1, %o4, %o1		! restore word
1060Sstevel@tonic-gate	bz,a	.copyword		! no zero byte if magic expression == 0
1070Sstevel@tonic-gate	st	%o1, [%o2 - 4]		! store word to dst (address pre-incremented)
1080Sstevel@tonic-gate
1090Sstevel@tonic-gate.zerobyte:
1100Sstevel@tonic-gate	set	0xff000000, %o4		! mask for 1st byte
1110Sstevel@tonic-gate	srl	%o1, 24, %o3		! %o3<7:0> = first byte
1120Sstevel@tonic-gate	andcc	%o1, %o4, %g0		! first byte zero?
1130Sstevel@tonic-gate	bz	.done			! yup, done
1140Sstevel@tonic-gate	stb	%o3, [%o2 - 4]		! store first byte
1150Sstevel@tonic-gate	set	0x00ff0000, %o5		! mask for 2nd byte
1160Sstevel@tonic-gate	srl	%o1, 16, %o3		! %o3<7:0> = second byte
1170Sstevel@tonic-gate	andcc	%o1, %o5, %g0		! second byte zero?
1180Sstevel@tonic-gate	bz	.done			! yup, done
1190Sstevel@tonic-gate	stb	%o3, [%o2 - 3]		! store second byte
1200Sstevel@tonic-gate	srl	%o4, 16, %o4		! 0x0000ff00 = mask for 3rd byte
1210Sstevel@tonic-gate	andcc	%o1, %o4, %g0		! third byte zero?
1220Sstevel@tonic-gate	srl	%o1, 8, %o3		! %o3<7:0> = third byte
1230Sstevel@tonic-gate	bz	.done			! yup, done
1240Sstevel@tonic-gate	stb	%o3, [%o2 - 2]		! store third byte
1250Sstevel@tonic-gate	stb	%o1, [%o2 - 1]		! store fourth byte
1260Sstevel@tonic-gate
1270Sstevel@tonic-gate.done:
1280Sstevel@tonic-gate	retl				! done with leaf function
1290Sstevel@tonic-gate	.empty
1300Sstevel@tonic-gate
1310Sstevel@tonic-gate.dstnotaligned:
1320Sstevel@tonic-gate	cmp	%o1, 2			! dst half word aligned?
1330Sstevel@tonic-gate	be,a	.storehalfword2		! yup, store half word at a time
1340Sstevel@tonic-gate	lduw	[%o2 + %o3], %o1	! src word
1350Sstevel@tonic-gate
1360Sstevel@tonic-gate.storebyte:
1370Sstevel@tonic-gate	lduw	[%o2 + %o3], %o1	! src word
1380Sstevel@tonic-gate	add	%o2, 4, %o2		! src += 4, dst += 4
1390Sstevel@tonic-gate	sub	%o1, %o4, %g1		! x - 0x01010101
1400Sstevel@tonic-gate	andn	%g1, %o1, %g1		! (x - 0x01010101) & ~x
1410Sstevel@tonic-gate	andcc	%g1, %o5, %g0		! ((x - 0x01010101) & ~x & 0x80808080)
1420Sstevel@tonic-gate	bnz	.zerobyte		! word has zero byte, handle end cases
1430Sstevel@tonic-gate	srl	%o1, 24, %g1		! %g1<7:0> = first byte
1440Sstevel@tonic-gate	stb	%g1, [%o2 - 4]		! store first byte; half-word aligned now
1450Sstevel@tonic-gate	srl	%o1, 8, %g1		! %g1<15:0> = byte 2, 3
1460Sstevel@tonic-gate	sth	%g1, [%o2 - 3]		! store bytes 2, 3
1470Sstevel@tonic-gate	ba	.storebyte		! next word
1480Sstevel@tonic-gate	stb	%o1, [%o2 - 1]		! store fourth byte
1490Sstevel@tonic-gate
1500Sstevel@tonic-gate.storehalfword:
1510Sstevel@tonic-gate	lduw	[%o2 + %o3], %o1	! src word
1520Sstevel@tonic-gate.storehalfword2:
1530Sstevel@tonic-gate	add	%o2, 4, %o2		! src += 4, dst += 4
1540Sstevel@tonic-gate	sub	%o1, %o4, %g1		! x - 0x01010101
1550Sstevel@tonic-gate	andn	%g1, %o1, %g1		! (x - 0x01010101) & ~x
1560Sstevel@tonic-gate	andcc	%g1, %o5, %g0		! ((x - 0x01010101) & ~x & 0x80808080)
1570Sstevel@tonic-gate	bnz	.zerobyte		! word has zero byte, handle end cases
1580Sstevel@tonic-gate	srl	%o1, 16, %g1		! get first and second byte
1590Sstevel@tonic-gate	sth	%g1, [%o2 - 4]		! store first and second byte
1600Sstevel@tonic-gate	ba	.storehalfword		! next word
1610Sstevel@tonic-gate	sth	%o1, [%o2 - 2]		! store third and fourth byte
1620Sstevel@tonic-gate
1630Sstevel@tonic-gate	! DO NOT remove these NOPs. It will slow down the halfword loop by 15%
1640Sstevel@tonic-gate
1650Sstevel@tonic-gate	nop				! padding
1660Sstevel@tonic-gate	nop				! padding
1670Sstevel@tonic-gate
1680Sstevel@tonic-gate	SET_SIZE(strcpy)
1690Sstevel@tonic-gate
170