xref: /onnv-gate/usr/src/lib/libc/capabilities/sun4u-us3/common/memcmp.s (revision 12719:bd9fb35d09c2)
1*12719SRod.Evans@Sun.COM/*
2*12719SRod.Evans@Sun.COM * CDDL HEADER START
3*12719SRod.Evans@Sun.COM *
4*12719SRod.Evans@Sun.COM * The contents of this file are subject to the terms of the
5*12719SRod.Evans@Sun.COM * Common Development and Distribution License (the "License").
6*12719SRod.Evans@Sun.COM * You may not use this file except in compliance with the License.
7*12719SRod.Evans@Sun.COM *
8*12719SRod.Evans@Sun.COM * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*12719SRod.Evans@Sun.COM * or http://www.opensolaris.org/os/licensing.
10*12719SRod.Evans@Sun.COM * See the License for the specific language governing permissions
11*12719SRod.Evans@Sun.COM * and limitations under the License.
12*12719SRod.Evans@Sun.COM *
13*12719SRod.Evans@Sun.COM * When distributing Covered Code, include this CDDL HEADER in each
14*12719SRod.Evans@Sun.COM * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*12719SRod.Evans@Sun.COM * If applicable, add the following below this CDDL HEADER, with the
16*12719SRod.Evans@Sun.COM * fields enclosed by brackets "[]" replaced with your own identifying
17*12719SRod.Evans@Sun.COM * information: Portions Copyright [yyyy] [name of copyright owner]
18*12719SRod.Evans@Sun.COM *
19*12719SRod.Evans@Sun.COM * CDDL HEADER END
20*12719SRod.Evans@Sun.COM */
21*12719SRod.Evans@Sun.COM
22*12719SRod.Evans@Sun.COM/*
23*12719SRod.Evans@Sun.COM * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
24*12719SRod.Evans@Sun.COM */
25*12719SRod.Evans@Sun.COM
26*12719SRod.Evans@Sun.COM	.file	"memcmp.s"
27*12719SRod.Evans@Sun.COM
28*12719SRod.Evans@Sun.COM/*
29*12719SRod.Evans@Sun.COM * memcmp(s1, s2, len)
30*12719SRod.Evans@Sun.COM *
31*12719SRod.Evans@Sun.COM * Compare n bytes:  s1>s2: >0  s1==s2: 0  s1<s2: <0
32*12719SRod.Evans@Sun.COM *
33*12719SRod.Evans@Sun.COM * Fast assembler language version of the following C-program for memcmp
34*12719SRod.Evans@Sun.COM * which represents the `standard' for the C-library.
35*12719SRod.Evans@Sun.COM *
36*12719SRod.Evans@Sun.COM *	int
37*12719SRod.Evans@Sun.COM *	memcmp(const void *s1, const void *s2, size_t n)
38*12719SRod.Evans@Sun.COM *	{
39*12719SRod.Evans@Sun.COM *		if (s1 != s2 && n != 0) {
40*12719SRod.Evans@Sun.COM *			const char *ps1 = s1;
41*12719SRod.Evans@Sun.COM *			const char *ps2 = s2;
42*12719SRod.Evans@Sun.COM *			do {
43*12719SRod.Evans@Sun.COM *				if (*ps1++ != *ps2++)
44*12719SRod.Evans@Sun.COM *					return(ps1[-1] - ps2[-1]);
45*12719SRod.Evans@Sun.COM *			} while (--n != 0);
46*12719SRod.Evans@Sun.COM *		}
47*12719SRod.Evans@Sun.COM *		return (0);
48*12719SRod.Evans@Sun.COM *	}
49*12719SRod.Evans@Sun.COM */
50*12719SRod.Evans@Sun.COM
51*12719SRod.Evans@Sun.COM#include <sys/asm_linkage.h>
52*12719SRod.Evans@Sun.COM#include <sys/machasi.h>
53*12719SRod.Evans@Sun.COM
54*12719SRod.Evans@Sun.COM#define	BLOCK_SIZE	64
55*12719SRod.Evans@Sun.COM
56*12719SRod.Evans@Sun.COM	ANSI_PRAGMA_WEAK(memcmp,function)
57*12719SRod.Evans@Sun.COM
58*12719SRod.Evans@Sun.COM	ENTRY(memcmp)
59*12719SRod.Evans@Sun.COM	cmp	%o0, %o1		! s1 == s2?
60*12719SRod.Evans@Sun.COM	be	%ncc, .cmpeq
61*12719SRod.Evans@Sun.COM	prefetch [%o0], #one_read
62*12719SRod.Evans@Sun.COM	prefetch [%o1], #one_read
63*12719SRod.Evans@Sun.COM
64*12719SRod.Evans@Sun.COM	! for small counts byte compare immediately
65*12719SRod.Evans@Sun.COM	cmp	%o2, 48
66*12719SRod.Evans@Sun.COM	bleu,a 	%ncc, .bytcmp
67*12719SRod.Evans@Sun.COM	mov	%o2, %o3		! o3 <= 48
68*12719SRod.Evans@Sun.COM
69*12719SRod.Evans@Sun.COM	! Count > 48. We will byte compare (8 + num of bytes to dbl align)
70*12719SRod.Evans@Sun.COM	! bytes. We assume that most miscompares will occur in the 1st 8 bytes
71*12719SRod.Evans@Sun.COM
72*12719SRod.Evans@Sun.COM	prefetch [%o0 + (1 * BLOCK_SIZE)], #one_read
73*12719SRod.Evans@Sun.COM	prefetch [%o1 + (1 * BLOCK_SIZE)], #one_read
74*12719SRod.Evans@Sun.COM
75*12719SRod.Evans@Sun.COM.chkdbl:
76*12719SRod.Evans@Sun.COM	and     %o0, 7, %o4             ! is s1 aligned on a 8 byte bound
77*12719SRod.Evans@Sun.COM	mov	8, %o3			! o2 > 48;  o3 = 8
78*12719SRod.Evans@Sun.COM        sub     %o4, 8, %o4		! o4 = -(num of bytes to dbl align)
79*12719SRod.Evans@Sun.COM	ba	%ncc, .bytcmp
80*12719SRod.Evans@Sun.COM        sub     %o3, %o4, %o3           ! o3 = 8 + (num of bytes to dbl align)
81*12719SRod.Evans@Sun.COM
82*12719SRod.Evans@Sun.COM1:	ldub	[%o1], %o5        	! byte compare loop
83*12719SRod.Evans@Sun.COM        inc     %o1
84*12719SRod.Evans@Sun.COM        inc     %o0
85*12719SRod.Evans@Sun.COM	dec	%o2
86*12719SRod.Evans@Sun.COM        cmp     %o4, %o5
87*12719SRod.Evans@Sun.COM	bne	%ncc, .noteq
88*12719SRod.Evans@Sun.COM.bytcmp:
89*12719SRod.Evans@Sun.COM	deccc   %o3
90*12719SRod.Evans@Sun.COM	bgeu,a	%ncc, 1b
91*12719SRod.Evans@Sun.COM        ldub    [%o0], %o4
92*12719SRod.Evans@Sun.COM
93*12719SRod.Evans@Sun.COM	! Check to see if there are more bytes to compare
94*12719SRod.Evans@Sun.COM	cmp	%o2, 0			! is o2 > 0
95*12719SRod.Evans@Sun.COM	bgu	%ncc, .dwcmp		! we should already be dbl aligned
96*12719SRod.Evans@Sun.COM	nop
97*12719SRod.Evans@Sun.COM.cmpeq:
98*12719SRod.Evans@Sun.COM        retl                             ! strings compare equal
99*12719SRod.Evans@Sun.COM	sub	%g0, %g0, %o0
100*12719SRod.Evans@Sun.COM
101*12719SRod.Evans@Sun.COM.noteq:
102*12719SRod.Evans@Sun.COM	retl				! strings aren't equal
103*12719SRod.Evans@Sun.COM	sub	%o4, %o5, %o0		! return(*s1 - *s2)
104*12719SRod.Evans@Sun.COM
105*12719SRod.Evans@Sun.COM
106*12719SRod.Evans@Sun.COM        ! double word compare - using ldd and faligndata. Compares upto
107*12719SRod.Evans@Sun.COM        ! 8 byte multiple count and does byte compare for the residual.
108*12719SRod.Evans@Sun.COM
109*12719SRod.Evans@Sun.COM.dwcmp:
110*12719SRod.Evans@Sun.COM	prefetch [%o0 + (2 * BLOCK_SIZE)], #one_read
111*12719SRod.Evans@Sun.COM	prefetch [%o1 + (2 * BLOCK_SIZE)], #one_read
112*12719SRod.Evans@Sun.COM
113*12719SRod.Evans@Sun.COM        ! if fprs.fef == 0, set it. Checking it, reqires 2 instructions.
114*12719SRod.Evans@Sun.COM        ! So set it anyway, without checking.
115*12719SRod.Evans@Sun.COM        rd      %fprs, %o3              ! o3 = fprs
116*12719SRod.Evans@Sun.COM        wr      %g0, 0x4, %fprs         ! fprs.fef = 1
117*12719SRod.Evans@Sun.COM
118*12719SRod.Evans@Sun.COM        andn    %o2, 7, %o4             ! o4 has 8 byte aligned cnt
119*12719SRod.Evans@Sun.COM	sub     %o4, 8, %o4
120*12719SRod.Evans@Sun.COM        alignaddr %o1, %g0, %g1
121*12719SRod.Evans@Sun.COM        ldd     [%g1], %d0
122*12719SRod.Evans@Sun.COM4:
123*12719SRod.Evans@Sun.COM        add     %g1, 8, %g1
124*12719SRod.Evans@Sun.COM        ldd     [%g1], %d2
125*12719SRod.Evans@Sun.COM	ldd	[%o0], %d6
126*12719SRod.Evans@Sun.COM	prefetch [%g1 + (3 * BLOCK_SIZE)], #one_read
127*12719SRod.Evans@Sun.COM	prefetch [%o0 + (3 * BLOCK_SIZE)], #one_read
128*12719SRod.Evans@Sun.COM        faligndata %d0, %d2, %d8
129*12719SRod.Evans@Sun.COM	fcmpne32 %d6, %d8, %o5
130*12719SRod.Evans@Sun.COM	fsrc1	%d6, %d6		! 2 fsrc1's added since o5 cannot
131*12719SRod.Evans@Sun.COM	fsrc1	%d8, %d8		! be used for 3 cycles else we
132*12719SRod.Evans@Sun.COM	fmovd	%d2, %d0		! create 9 bubbles in the pipeline
133*12719SRod.Evans@Sun.COM	brnz,a,pn %o5, 6f
134*12719SRod.Evans@Sun.COM	sub     %o1, %o0, %o1           ! o1 gets the difference
135*12719SRod.Evans@Sun.COM        subcc   %o4, 8, %o4
136*12719SRod.Evans@Sun.COM        add     %o0, 8, %o0
137*12719SRod.Evans@Sun.COM        add     %o1, 8, %o1
138*12719SRod.Evans@Sun.COM        bgu,pt	%ncc, 4b
139*12719SRod.Evans@Sun.COM        sub     %o2, 8, %o2
140*12719SRod.Evans@Sun.COM
141*12719SRod.Evans@Sun.COM.residcmp:
142*12719SRod.Evans@Sun.COM        ba      6f
143*12719SRod.Evans@Sun.COM	sub     %o1, %o0, %o1           ! o1 gets the difference
144*12719SRod.Evans@Sun.COM
145*12719SRod.Evans@Sun.COM5:      ldub    [%o0 + %o1], %o5        ! byte compare loop
146*12719SRod.Evans@Sun.COM        inc     %o0
147*12719SRod.Evans@Sun.COM        cmp     %o4, %o5
148*12719SRod.Evans@Sun.COM        bne     %ncc, .dnoteq
149*12719SRod.Evans@Sun.COM6:
150*12719SRod.Evans@Sun.COM        deccc   %o2
151*12719SRod.Evans@Sun.COM        bgeu,a	%ncc, 5b
152*12719SRod.Evans@Sun.COM        ldub    [%o0], %o4
153*12719SRod.Evans@Sun.COM
154*12719SRod.Evans@Sun.COM	and     %o3, 0x4, %o3           ! fprs.du = fprs.dl = 0
155*12719SRod.Evans@Sun.COM	wr      %o3, %g0, %fprs         ! fprs = o3 - restore fprs
156*12719SRod.Evans@Sun.COM	retl
157*12719SRod.Evans@Sun.COM	sub	%g0, %g0, %o0		! strings compare equal
158*12719SRod.Evans@Sun.COM
159*12719SRod.Evans@Sun.COM.dnoteq:
160*12719SRod.Evans@Sun.COM	and     %o3, 0x4, %o3           ! fprs.du = fprs.dl = 0
161*12719SRod.Evans@Sun.COM	wr      %o3, %g0, %fprs         ! fprs = o3 - restore fprs
162*12719SRod.Evans@Sun.COM	retl
163*12719SRod.Evans@Sun.COM	sub	%o4, %o5, %o0		! return(*s1 - *s2)
164*12719SRod.Evans@Sun.COM
165*12719SRod.Evans@Sun.COM	SET_SIZE(memcmp)
166