xref: /netbsd-src/common/lib/libc/arch/ia64/string/bcopy.S (revision b83ebeba7f767758d2778bb0f9d7a76534253621)
1*b83ebebaSscole/*	$NetBSD: bcopy.S,v 1.1 2016/08/05 15:06:02 scole Exp $	*/
2*b83ebebaSscole
3*b83ebebaSscole/*-
4*b83ebebaSscole * Copyright (c) 2000 Doug Rabson
5*b83ebebaSscole * All rights reserved.
6*b83ebebaSscole *
7*b83ebebaSscole * Redistribution and use in source and binary forms, with or without
8*b83ebebaSscole * modification, are permitted provided that the following conditions
9*b83ebebaSscole * are met:
10*b83ebebaSscole * 1. Redistributions of source code must retain the above copyright
11*b83ebebaSscole *    notice, this list of conditions and the following disclaimer.
12*b83ebebaSscole * 2. Redistributions in binary form must reproduce the above copyright
13*b83ebebaSscole *    notice, this list of conditions and the following disclaimer in the
14*b83ebebaSscole *    documentation and/or other materials provided with the distribution.
15*b83ebebaSscole *
16*b83ebebaSscole * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17*b83ebebaSscole * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18*b83ebebaSscole * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19*b83ebebaSscole * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20*b83ebebaSscole * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21*b83ebebaSscole * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22*b83ebebaSscole * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23*b83ebebaSscole * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24*b83ebebaSscole * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25*b83ebebaSscole * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26*b83ebebaSscole * SUCH DAMAGE.
27*b83ebebaSscole *
28*b83ebebaSscole * __FBSDID("$FreeBSD: releng/10.1/lib/libc/ia64/string/bcopy.S 125669 2004-02-10 20:45:28Z cperciva $");
29*b83ebebaSscole */
30*b83ebebaSscole
31*b83ebebaSscole#include <machine/asm.h>
32*b83ebebaSscole
33*b83ebebaSscole/*
34*b83ebebaSscole * void
35*b83ebebaSscole * bcopy(const void *src, void *dst, size_t len);
36*b83ebebaSscole */
37*b83ebebaSscole
38*b83ebebaSscole/*
39*b83ebebaSscole * Not the fastest bcopy in the world.
40*b83ebebaSscole */
41*b83ebebaSscoleENTRY(bcopy, 3)
42*b83ebebaSscole
43*b83ebebaSscole	cmp.le	p6,p0=in2,r0			// bail if len <= 0
44*b83ebebaSscole(p6)	br.ret.spnt.few rp
45*b83ebebaSscole
46*b83ebebaSscole	sub	r14=in1,in0 ;;			// check for overlap
47*b83ebebaSscole	cmp.ltu	p6,p0=r14,in2			// dst-src < len
48*b83ebebaSscole(p6)	br.cond.spnt.few 5f
49*b83ebebaSscole
50*b83ebebaSscole	extr.u	r14=in0,0,3			// src & 7
51*b83ebebaSscole	extr.u	r15=in1,0,3 ;;			// dst & 7
52*b83ebebaSscole	cmp.eq	p6,p0=r14,r15			// different alignment?
53*b83ebebaSscole(p6)	br.cond.spnt.few 2f			// branch if same alignment
54*b83ebebaSscole
55*b83ebebaSscole1:	ld1	r14=[in0],1 ;;			// copy bytewise
56*b83ebebaSscole	st1	[in1]=r14,1
57*b83ebebaSscole	add	in2=-1,in2 ;;			// len--
58*b83ebebaSscole	cmp.ne	p6,p0=r0,in2
59*b83ebebaSscole(p6)	br.cond.dptk.few 1b			// loop
60*b83ebebaSscole	br.ret.sptk.few rp			// done
61*b83ebebaSscole
62*b83ebebaSscole2:	cmp.eq	p6,p0=r14,r0			// aligned?
63*b83ebebaSscole(p6)	br.cond.sptk.few 4f
64*b83ebebaSscole
65*b83ebebaSscole3:	ld1	r14=[in0],1 ;;			// copy bytewise
66*b83ebebaSscole	st1	[in1]=r14,1
67*b83ebebaSscole	extr.u	r15=in0,0,3			// src & 7
68*b83ebebaSscole	add	in2=-1,in2 ;;			// len--
69*b83ebebaSscole	cmp.eq	p6,p0=r0,in2			// done?
70*b83ebebaSscole	cmp.eq	p7,p0=r0,r15 ;;			// aligned now?
71*b83ebebaSscole(p6)	br.ret.spnt.few rp			// return if done
72*b83ebebaSscole(p7)	br.cond.spnt.few 4f			// go to main copy
73*b83ebebaSscole	br.cond.sptk.few 3b			// more bytes to copy
74*b83ebebaSscole
75*b83ebebaSscole	// At this point, in2 is non-zero
76*b83ebebaSscole
77*b83ebebaSscole4:	mov	r14=8 ;;
78*b83ebebaSscole	cmp.ltu	p6,p0=in2,r14 ;;		// len < 8?
79*b83ebebaSscole(p6)	br.cond.spnt.few 1b			// byte copy the end
80*b83ebebaSscole	ld8	r15=[in0],8 ;;			// copy word
81*b83ebebaSscole	st8	[in1]=r15,8
82*b83ebebaSscole	add	in2=-8,in2 ;;			// len -= 8
83*b83ebebaSscole	cmp.ne	p6,p0=r0,in2			// done?
84*b83ebebaSscole(p6)	br.cond.spnt.few 4b			// again
85*b83ebebaSscole
86*b83ebebaSscole	br.ret.sptk.few rp			// return
87*b83ebebaSscole
88*b83ebebaSscole	// Don't bother optimising overlap case
89*b83ebebaSscole
90*b83ebebaSscole5:	add	in0=in0,in2
91*b83ebebaSscole	add	in1=in1,in2 ;;
92*b83ebebaSscole	add	in0=-1,in0
93*b83ebebaSscole	add	in1=-1,in1 ;;
94*b83ebebaSscole
95*b83ebebaSscole6:	ld1	r14=[in0],-1 ;;
96*b83ebebaSscole	st1	[in1]=r14,-1
97*b83ebebaSscole	add	in2=-1,in2 ;;
98*b83ebebaSscole	cmp.ne	p6,p0=r0,in2
99*b83ebebaSscole(p6)	br.cond.spnt.few 6b
100*b83ebebaSscole
101*b83ebebaSscole	br.ret.sptk.few rp
102*b83ebebaSscoleEND(bcopy)
103