xref: /onnv-gate/usr/src/lib/libc/amd64/gen/strcat.s (revision 7298:b69e27387f74)
10Sstevel@tonic-gate/*
20Sstevel@tonic-gate * CDDL HEADER START
30Sstevel@tonic-gate *
40Sstevel@tonic-gate * The contents of this file are subject to the terms of the
5*7298SMark.J.Nelson@Sun.COM * Common Development and Distribution License (the "License").
6*7298SMark.J.Nelson@Sun.COM * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate *
80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate * See the License for the specific language governing permissions
110Sstevel@tonic-gate * and limitations under the License.
120Sstevel@tonic-gate *
130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate *
190Sstevel@tonic-gate * CDDL HEADER END
200Sstevel@tonic-gate */
210Sstevel@tonic-gate/*
220Sstevel@tonic-gate * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
230Sstevel@tonic-gate * Use is subject to license terms.
240Sstevel@tonic-gate */
250Sstevel@tonic-gate
26*7298SMark.J.Nelson@Sun.COM	.file	"strcat.s"
270Sstevel@tonic-gate
280Sstevel@tonic-gate/
290Sstevel@tonic-gate/ strcat(s1, s2)
300Sstevel@tonic-gate/
310Sstevel@tonic-gate/ Concatenates s2 on the end of s1.  s1's space must be large enough.
320Sstevel@tonic-gate/ Returns s1.
330Sstevel@tonic-gate/
340Sstevel@tonic-gate/ Fast assembly language version of the following C-program strcat
350Sstevel@tonic-gate/ which represents the `standard' for the C-library.
360Sstevel@tonic-gate/
370Sstevel@tonic-gate/	char *
380Sstevel@tonic-gate/	strcat(char *s1, const char *s2)
390Sstevel@tonic-gate/	{
400Sstevel@tonic-gate/		char	*os1 = s1;
410Sstevel@tonic-gate/
420Sstevel@tonic-gate/		while (*s1++)
430Sstevel@tonic-gate/			;
440Sstevel@tonic-gate/		--s1;
450Sstevel@tonic-gate/		while (*s1++ = *s2++)
460Sstevel@tonic-gate/			;
470Sstevel@tonic-gate/		return (os1);
480Sstevel@tonic-gate/	}
490Sstevel@tonic-gate/
500Sstevel@tonic-gate/ In this assembly language version, the following expression is used
510Sstevel@tonic-gate/ to check if a 32-bit word data contains a null byte or not:
520Sstevel@tonic-gate/	(((A & 0x7f7f7f7f) + 0x7f7f7f7f) | A) & 0x80808080
530Sstevel@tonic-gate/ If the above expression geneates a value other than 0x80808080,
540Sstevel@tonic-gate/ that means the 32-bit word data contains a null byte.
550Sstevel@tonic-gate/
560Sstevel@tonic-gate/ The above has been extended for 64-bit support.
570Sstevel@tonic-gate/
580Sstevel@tonic-gate
590Sstevel@tonic-gate#include "SYS.h"
600Sstevel@tonic-gate
610Sstevel@tonic-gate	ENTRY(strcat)	/* (char *s1, char *s2) */
620Sstevel@tonic-gate	/ find a null byte in destination string
630Sstevel@tonic-gate	movq	%rdi,%rax		/ prepare return value
640Sstevel@tonic-gate	movabsq	$0x7f7f7f7f7f7f7f7f, %r8	/ %r8 = 0x7f...
650Sstevel@tonic-gate	movq	%r8, %r9
660Sstevel@tonic-gate	notq	%r9				/ %r9 = 0x80...
670Sstevel@tonic-gate	testq	$7, %rdi		/ if %rdi not quadword aligned
680Sstevel@tonic-gate	jnz	.L1			/ goto .L1
690Sstevel@tonic-gate	.align	4
700Sstevel@tonic-gate.L2:
710Sstevel@tonic-gate	movq	(%rdi), %rdx		/ move 1 quadword from (%rdi) to %rdx
720Sstevel@tonic-gate	movq	%r8, %rcx
730Sstevel@tonic-gate	andq	%rdx, %rcx		/ %rcx = %rdx & 0x7f7f7f7f7f7f7f7f
740Sstevel@tonic-gate	addq	$8, %rdi		/ next quadword
750Sstevel@tonic-gate	addq	%r8, %rcx		/ %rcx += 0x7f7f7f7f7f7f7f7f
760Sstevel@tonic-gate	orq	%rdx, %rcx		/ %rcx |= %rdx
770Sstevel@tonic-gate	andq	%r9, %rcx		/ %rcx &= 0x8080808080808080
780Sstevel@tonic-gate	cmpq	%r9, %rcx		/ if no null byte in this quadword
790Sstevel@tonic-gate	je	.L2			/ goto .L2
800Sstevel@tonic-gate	subq	$8, %rdi		/ post-incremented
810Sstevel@tonic-gate.L1:
820Sstevel@tonic-gate	cmpb	$0, (%rdi)		/ if a byte in (%rdi) is null
830Sstevel@tonic-gate	je	.L3			/ goto .L3
840Sstevel@tonic-gate	incq	%rdi			/ next byte
850Sstevel@tonic-gate	testq	$7, %rdi		/ if %rdi not quadword aligned
860Sstevel@tonic-gate	jnz	.L1			/ goto .L1
870Sstevel@tonic-gate	jmp	.L2			/ goto .L2 (%rdi quadword aligned)
880Sstevel@tonic-gate	.align	4
890Sstevel@tonic-gate.L3:
900Sstevel@tonic-gate	/ %rdi points to a null byte in destination string
910Sstevel@tonic-gate	testq	$7, %rsi		/ if %rsi not quadword aligned
920Sstevel@tonic-gate	jnz	.L4			/ goto .L4
930Sstevel@tonic-gate	.align	4
940Sstevel@tonic-gate.L5:
950Sstevel@tonic-gate	movq	(%rsi), %rdx		/ move 1 quadword from (%rsi) to %rdx
960Sstevel@tonic-gate	movq	%r8, %rcx
970Sstevel@tonic-gate	andq	%rdx, %rcx		/ %rcx = %rdx & 0x7f7f7f7f7f7f7f7f
980Sstevel@tonic-gate	addq	$8, %rsi		/ next quadword
990Sstevel@tonic-gate	addq	%r8, %rcx		/ %rcx += 0x7f7f7f7f7f7f7f7f
1000Sstevel@tonic-gate	orq	%rdx, %rcx		/ %rcx |= %rdx
1010Sstevel@tonic-gate	andq	%r9, %rcx		/ %rcx &= 0x8080808080808080
1020Sstevel@tonic-gate	cmpq	%r9, %rcx		/ if null byte in this quadaword
1030Sstevel@tonic-gate	jne	.L7			/ goto .L7
1040Sstevel@tonic-gate	movq	%rdx, (%rdi)		/ copy this quadword to (%rdi)
1050Sstevel@tonic-gate	addq	$8, %rdi		/ next quadword
1060Sstevel@tonic-gate	jmp	.L5			/ goto .L5
1070Sstevel@tonic-gate.L7:
1080Sstevel@tonic-gate	subq	$8, %rsi		/ post-incremented
1090Sstevel@tonic-gate	.align	4
1100Sstevel@tonic-gate.L4:
1110Sstevel@tonic-gate	movb	(%rsi), %dl		/ %dl = a byte in (%rsi)
1120Sstevel@tonic-gate	cmpb	$0, %dl			/ compare %dl with a null byte
1130Sstevel@tonic-gate	movb	%dl, (%rdi)		/ copy %dl to (%rdi)
1140Sstevel@tonic-gate	je	.L6			/ if %dl is a null, goto .L6
1150Sstevel@tonic-gate	incq	%rsi			/ next byte
1160Sstevel@tonic-gate	incq	%rdi			/ next byte
1170Sstevel@tonic-gate	testq	$7, %rsi		/ if %rsi not word aligned
1180Sstevel@tonic-gate	jnz	.L4			/ goto .L4
1190Sstevel@tonic-gate	jmp	.L5			/ goto .L5 (%rsi word aligned)
1200Sstevel@tonic-gate	.align	4
1210Sstevel@tonic-gate.L6:
1220Sstevel@tonic-gate	ret
1230Sstevel@tonic-gate	SET_SIZE(strcat)
124