xref: /openbsd-src/sys/lib/libkern/arch/sh/memmove.S (revision af28a4e1845fd11be1ff2e08c44ebaae8fd62b6a)
15766bcd8Stedu/*	$NetBSD: memcpy.S,v 1.2 2006/04/22 23:53:47 uwe Exp $	*/
25766bcd8Stedu
35766bcd8Stedu/*
45766bcd8Stedu * Copyright (c) 2000 SHIMIZU Ryo <ryo@misakimix.org>
55766bcd8Stedu * All rights reserved.
65766bcd8Stedu *
75766bcd8Stedu * Redistribution and use in source and binary forms, with or without
85766bcd8Stedu * modification, are permitted provided that the following conditions
95766bcd8Stedu * are met:
105766bcd8Stedu * 1. Redistributions of source code must retain the above copyright
115766bcd8Stedu *    notice, this list of conditions and the following disclaimer.
125766bcd8Stedu * 2. Redistributions in binary form must reproduce the above copyright
135766bcd8Stedu *    notice, this list of conditions and the following disclaimer in the
145766bcd8Stedu *    documentation and/or other materials provided with the distribution.
155766bcd8Stedu * 3. The name of the author may not be used to endorse or promote products
165766bcd8Stedu *    derived from this software without specific prior written permission.
175766bcd8Stedu *
185766bcd8Stedu * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
195766bcd8Stedu * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
205766bcd8Stedu * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
215766bcd8Stedu * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
225766bcd8Stedu * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
235766bcd8Stedu * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
245766bcd8Stedu * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
255766bcd8Stedu * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
265766bcd8Stedu * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
275766bcd8Stedu * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
285766bcd8Stedu */
295766bcd8Stedu
305766bcd8Stedu#include <machine/asm.h>
315766bcd8Stedu
325766bcd8Stedu#define	REG_DST0	r3
335766bcd8Stedu#define	REG_SRC		r5
345766bcd8Stedu#define	REG_DST		r4
355766bcd8Stedu#define	REG_LEN		r6
365766bcd8Stedu
375766bcd8SteduENTRY(bcopy)
385766bcd8Stedu	/* swap registers, use DST0 as a temporary */
395766bcd8Stedu	mov	REG_DST,REG_DST0
405766bcd8Stedu	mov	REG_SRC,REG_DST
415766bcd8Stedu	mov	REG_DST0,REG_SRC
425766bcd8Stedu
435766bcd8SteduENTRY(memmove)
445766bcd8Stedu	mov	REG_DST,REG_DST0
455766bcd8Stedu	cmp/hi	REG_DST,REG_SRC
465766bcd8Stedu	bf/s	bcopy_overlap
475766bcd8Stedu
485766bcd8SteduENTRY(memcpy)
49*af28a4e1Smiod	mov	REG_DST,REG_DST0
505766bcd8Stedu	cmp/eq	REG_DST,REG_SRC	/* if ( src == dst ) return; */
515766bcd8Stedu	bt/s	bcopy_return
525766bcd8Stedu	mov	REG_SRC,r0
535766bcd8Stedu	xor	REG_DST,r0
545766bcd8Stedu	and	#3,r0
555766bcd8Stedu	mov	r0,r1
565766bcd8Stedu	tst	r0,r0		/* (src ^ dst) & 3         */
575766bcd8Stedu	bf/s	word_align
585766bcd8Stedu
595766bcd8Stedulongword_align:
605766bcd8Stedu	tst	REG_LEN,REG_LEN	/* if ( len==0 ) return;   */
615766bcd8Stedu	bt/s	bcopy_return
625766bcd8Stedu
635766bcd8Stedu
645766bcd8Stedu	mov	REG_SRC,r0
655766bcd8Stedu	tst	#1,r0		/* if ( src & 1 )          */
665766bcd8Stedu	bt	1f
675766bcd8Stedu	mov.b	@REG_SRC+,r0	/*    *dst++ = *src++;     */
685766bcd8Stedu	add	#-1,REG_LEN
695766bcd8Stedu	mov.b	r0,@REG_DST
705766bcd8Stedu	add	#1,REG_DST
715766bcd8Stedu1:
725766bcd8Stedu
735766bcd8Stedu	mov	#1,r0
745766bcd8Stedu	cmp/hi	r0,REG_LEN	/* if ( (len > 1) &&       */
755766bcd8Stedu	bf/s	1f
765766bcd8Stedu	mov	REG_SRC,r0
775766bcd8Stedu	tst	#2,r0		/*      (src & 2) {        */
785766bcd8Stedu	bt	1f
795766bcd8Stedu	mov.w	@REG_SRC+,r0	/*        *((unsigned short*)dst)++ = *((unsigned short*)src)++; */
805766bcd8Stedu	add	#-2,REG_LEN	/*        len -= 2;                                              */
815766bcd8Stedu	mov.w	r0,@REG_DST
825766bcd8Stedu	add	#2,REG_DST	/* }                       */
835766bcd8Stedu1:
845766bcd8Stedu
855766bcd8Stedu
865766bcd8Stedu	mov	#3,r1
875766bcd8Stedu	cmp/hi	r1,REG_LEN	/* while ( len > 3 ) {     */
885766bcd8Stedu	bf/s	no_align_delay
895766bcd8Stedu	tst	REG_LEN,REG_LEN
905766bcd8Stedu2:
915766bcd8Stedu	mov.l	@REG_SRC+,r0	/*   *((unsigned long*)dst)++ = *((unsigned long*)src)++;        */
925766bcd8Stedu	add	#-4,REG_LEN	/*   len -= 4;                                                   */
935766bcd8Stedu	mov.l	r0,@REG_DST
945766bcd8Stedu	cmp/hi	r1,REG_LEN
955766bcd8Stedu	bt/s	2b
965766bcd8Stedu	add	#4,REG_DST	/* }                       */
975766bcd8Stedu
985766bcd8Stedu	bra	no_align_delay
995766bcd8Stedu	tst	REG_LEN,REG_LEN
1005766bcd8Stedu
1015766bcd8Stedu
1025766bcd8Steduword_align:
1035766bcd8Stedu	mov	r1,r0
1045766bcd8Stedu	tst	#1,r0
1055766bcd8Stedu	bf/s	no_align_delay
1065766bcd8Stedu	tst	REG_LEN,REG_LEN	/* if ( len == 0 ) return; */
1075766bcd8Stedu	bt	bcopy_return
1085766bcd8Stedu
1095766bcd8Stedu
1105766bcd8Stedu	mov	REG_SRC,r0	/* if ( src & 1 )          */
1115766bcd8Stedu	tst	#1,r0
1125766bcd8Stedu	bt	1f
1135766bcd8Stedu	mov.b	@REG_SRC+,r0	/*    *dst++ = *src++;     */
1145766bcd8Stedu	add	#-1,REG_LEN
1155766bcd8Stedu	mov.b	r0,@REG_DST
1165766bcd8Stedu	add	#1,REG_DST
1175766bcd8Stedu1:
1185766bcd8Stedu
1195766bcd8Stedu
1205766bcd8Stedu	mov	#1,r1
1215766bcd8Stedu	cmp/hi	r1,REG_LEN	/* while ( len > 1 ) {     */
1225766bcd8Stedu	bf/s	no_align_delay
1235766bcd8Stedu	tst	REG_LEN,REG_LEN
1245766bcd8Stedu2:
1255766bcd8Stedu	mov.w	@REG_SRC+,r0	/*   *((unsigned short*)dst)++ = *((unsigned short*)src)++;      */
1265766bcd8Stedu	add	#-2,REG_LEN	/*   len -= 2;                                                   */
1275766bcd8Stedu	mov.w	r0,@REG_DST
1285766bcd8Stedu	cmp/hi	r1,REG_LEN
1295766bcd8Stedu	bt/s	2b
1305766bcd8Stedu	add	#2,REG_DST	/* }                       */
1315766bcd8Stedu
1325766bcd8Stedu
1335766bcd8Steduno_align:
1345766bcd8Stedu	tst	REG_LEN,REG_LEN	/* while ( len!= ) {       */
1355766bcd8Steduno_align_delay:
1365766bcd8Stedu	bt	bcopy_return
1375766bcd8Stedu1:
1385766bcd8Stedu	mov.b	@REG_SRC+,r0	/*    *dst++ = *src++;     */
1395766bcd8Stedu	add	#-1,REG_LEN	/*    len--;               */
1405766bcd8Stedu	mov.b	r0,@REG_DST
1415766bcd8Stedu	tst	REG_LEN,REG_LEN
1425766bcd8Stedu	bf/s	1b
1435766bcd8Stedu	add	#1,REG_DST	/* }                       */
1445766bcd8Stedubcopy_return:
1455766bcd8Stedu	rts
1465766bcd8Stedu	mov	REG_DST0,r0
1475766bcd8Stedu
1485766bcd8Stedubcopy_overlap:
1495766bcd8Stedu	add	REG_LEN,REG_SRC
1505766bcd8Stedu	add	REG_LEN,REG_DST
1515766bcd8Stedu
1525766bcd8Stedu	mov	REG_SRC,r0
1535766bcd8Stedu	xor	REG_DST,r0
1545766bcd8Stedu	and	#3,r0
1555766bcd8Stedu	mov	r0,r1
1565766bcd8Stedu	tst	r0,r0		/* (src ^ dst) & 3         */
1575766bcd8Stedu	bf/s	ov_word_align
1585766bcd8Stedu
1595766bcd8Steduov_longword_align:
1605766bcd8Stedu	tst	REG_LEN,REG_LEN	/* if ( len==0 ) return;   */
1615766bcd8Stedu	bt/s	bcopy_return
1625766bcd8Stedu
1635766bcd8Stedu
1645766bcd8Stedu	mov	REG_SRC,r0
1655766bcd8Stedu	tst	#1,r0		/* if ( src & 1 )          */
1665766bcd8Stedu	bt	1f
1675766bcd8Stedu	add	#-1,REG_SRC	/*    *--dst = *--src;     */
1685766bcd8Stedu	mov.b	@REG_SRC,r0
1695766bcd8Stedu	mov.b	r0,@-REG_DST
1705766bcd8Stedu	add	#-1,REG_LEN
1715766bcd8Stedu1:
1725766bcd8Stedu
1735766bcd8Stedu
1745766bcd8Stedu	mov	#1,r0
1755766bcd8Stedu	cmp/hi	r0,REG_LEN	/* if ( (len > 1) &&       */
1765766bcd8Stedu	bf/s	1f
1775766bcd8Stedu	mov	REG_SRC,r0
1785766bcd8Stedu	tst	#2,r0		/*      (src & 2) {        */
1795766bcd8Stedu	bt	1f
1805766bcd8Stedu	add	#-2,REG_SRC	/*        *--((unsigned short*)dst) = *--((unsigned short*)src); */
1815766bcd8Stedu	mov.w	@REG_SRC,r0
1825766bcd8Stedu	add	#-2,REG_LEN	/*        len -= 2;                                              */
1835766bcd8Stedu	mov.w	r0,@-REG_DST	/* }                       */
1845766bcd8Stedu1:
1855766bcd8Stedu
1865766bcd8Stedu
1875766bcd8Stedu	mov	#3,r1
1885766bcd8Stedu	cmp/hi	r1,REG_LEN	/* while ( len > 3 ) {     */
1895766bcd8Stedu	bf/s	ov_no_align_delay
1905766bcd8Stedu	tst	REG_LEN,REG_LEN
1915766bcd8Stedu2:
1925766bcd8Stedu	add	#-4,REG_SRC
1935766bcd8Stedu	mov.l	@REG_SRC,r0	/*   *((unsigned long*)dst)++ = *((unsigned long*)src)++;        */
1945766bcd8Stedu	add	#-4,REG_LEN	/*   len -= 4;                                                   */
1955766bcd8Stedu	cmp/hi	r1,REG_LEN
1965766bcd8Stedu	bt/s	2b
1975766bcd8Stedu	mov.l	r0,@-REG_DST	/* }                       */
1985766bcd8Stedu
1995766bcd8Stedu	bra	ov_no_align_delay
2005766bcd8Stedu	tst	REG_LEN,REG_LEN
2015766bcd8Stedu
2025766bcd8Stedu
2035766bcd8Steduov_word_align:
2045766bcd8Stedu	mov	r1,r0
2055766bcd8Stedu	tst	#1,r0
2065766bcd8Stedu	bf/s	ov_no_align_delay
2075766bcd8Stedu	tst	REG_LEN,REG_LEN	/* if ( len == 0 ) return; */
2085766bcd8Stedu	bt	bcopy_return
2095766bcd8Stedu
2105766bcd8Stedu
2115766bcd8Stedu	mov	REG_SRC,r0	/* if ( src & 1 )          */
2125766bcd8Stedu	tst	#1,r0
2135766bcd8Stedu	bt	1f
2145766bcd8Stedu	add	#-1,REG_SRC
2155766bcd8Stedu	mov.b	@REG_SRC,r0	/*    *--dst = *--src;     */
2165766bcd8Stedu	add	#-1,REG_LEN
2175766bcd8Stedu	mov.b	r0,@-REG_DST
2185766bcd8Stedu1:
2195766bcd8Stedu
2205766bcd8Stedu
2215766bcd8Stedu	mov	#1,r1
2225766bcd8Stedu	cmp/hi	r1,REG_LEN	/* while ( len > 1 ) {     */
2235766bcd8Stedu	bf/s	ov_no_align_delay
2245766bcd8Stedu	tst	REG_LEN,REG_LEN
2255766bcd8Stedu2:
2265766bcd8Stedu	add	#-2,REG_SRC
2275766bcd8Stedu	mov.w	@REG_SRC,r0	/*   *--((unsigned short*)dst) = *--((unsigned short*)src);      */
2285766bcd8Stedu	add	#-2,REG_LEN	/*   len -= 2;                                                   */
2295766bcd8Stedu	cmp/hi	r1,REG_LEN
2305766bcd8Stedu	bt/s	2b
2315766bcd8Stedu	mov.w	r0,@-REG_DST	/* }                       */
2325766bcd8Stedu
2335766bcd8Stedu
2345766bcd8Steduov_no_align:
2355766bcd8Stedu	tst	REG_LEN,REG_LEN	/* while ( len!= ) {       */
2365766bcd8Steduov_no_align_delay:
2375766bcd8Stedu	bt	9f
2385766bcd8Stedu1:
2395766bcd8Stedu	add	#-1,REG_SRC
2405766bcd8Stedu	mov.b	@REG_SRC,r0	/*    *--dst = *--src;     */
2415766bcd8Stedu	add	#-1,REG_LEN	/*    len--;               */
2425766bcd8Stedu	tst	REG_LEN,REG_LEN
2435766bcd8Stedu	bf/s	1b
2445766bcd8Stedu	mov.b	r0,@-REG_DST	/* }                       */
2455766bcd8Stedu9:
2465766bcd8Stedu	rts
2475766bcd8Stedu	mov	REG_DST0,r0
248