15766bcd8Stedu/* $NetBSD: memcpy.S,v 1.2 2006/04/22 23:53:47 uwe Exp $ */ 25766bcd8Stedu 35766bcd8Stedu/* 45766bcd8Stedu * Copyright (c) 2000 SHIMIZU Ryo <ryo@misakimix.org> 55766bcd8Stedu * All rights reserved. 65766bcd8Stedu * 75766bcd8Stedu * Redistribution and use in source and binary forms, with or without 85766bcd8Stedu * modification, are permitted provided that the following conditions 95766bcd8Stedu * are met: 105766bcd8Stedu * 1. Redistributions of source code must retain the above copyright 115766bcd8Stedu * notice, this list of conditions and the following disclaimer. 125766bcd8Stedu * 2. Redistributions in binary form must reproduce the above copyright 135766bcd8Stedu * notice, this list of conditions and the following disclaimer in the 145766bcd8Stedu * documentation and/or other materials provided with the distribution. 155766bcd8Stedu * 3. The name of the author may not be used to endorse or promote products 165766bcd8Stedu * derived from this software without specific prior written permission. 175766bcd8Stedu * 185766bcd8Stedu * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 195766bcd8Stedu * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 205766bcd8Stedu * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 215766bcd8Stedu * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 225766bcd8Stedu * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 235766bcd8Stedu * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 245766bcd8Stedu * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 255766bcd8Stedu * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 265766bcd8Stedu * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 275766bcd8Stedu * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 285766bcd8Stedu */ 295766bcd8Stedu 305766bcd8Stedu#include <machine/asm.h> 315766bcd8Stedu 325766bcd8Stedu#define REG_DST0 r3 335766bcd8Stedu#define REG_SRC r5 345766bcd8Stedu#define REG_DST r4 355766bcd8Stedu#define REG_LEN r6 365766bcd8Stedu 375766bcd8SteduENTRY(bcopy) 385766bcd8Stedu /* swap registers, use DST0 as a temporary */ 395766bcd8Stedu mov REG_DST,REG_DST0 405766bcd8Stedu mov REG_SRC,REG_DST 415766bcd8Stedu mov REG_DST0,REG_SRC 425766bcd8Stedu 435766bcd8SteduENTRY(memmove) 445766bcd8Stedu mov REG_DST,REG_DST0 455766bcd8Stedu cmp/hi REG_DST,REG_SRC 465766bcd8Stedu bf/s bcopy_overlap 475766bcd8Stedu 485766bcd8SteduENTRY(memcpy) 49*af28a4e1Smiod mov REG_DST,REG_DST0 505766bcd8Stedu cmp/eq REG_DST,REG_SRC /* if ( src == dst ) return; */ 515766bcd8Stedu bt/s bcopy_return 525766bcd8Stedu mov REG_SRC,r0 535766bcd8Stedu xor REG_DST,r0 545766bcd8Stedu and #3,r0 555766bcd8Stedu mov r0,r1 565766bcd8Stedu tst r0,r0 /* (src ^ dst) & 3 */ 575766bcd8Stedu bf/s word_align 585766bcd8Stedu 595766bcd8Stedulongword_align: 605766bcd8Stedu tst REG_LEN,REG_LEN /* if ( len==0 ) return; */ 615766bcd8Stedu bt/s bcopy_return 625766bcd8Stedu 635766bcd8Stedu 645766bcd8Stedu mov REG_SRC,r0 655766bcd8Stedu tst #1,r0 /* if ( src & 1 ) */ 665766bcd8Stedu bt 1f 675766bcd8Stedu mov.b @REG_SRC+,r0 /* *dst++ = *src++; */ 685766bcd8Stedu add #-1,REG_LEN 695766bcd8Stedu mov.b r0,@REG_DST 705766bcd8Stedu add #1,REG_DST 715766bcd8Stedu1: 725766bcd8Stedu 735766bcd8Stedu mov #1,r0 745766bcd8Stedu cmp/hi r0,REG_LEN /* if ( (len > 1) && */ 755766bcd8Stedu bf/s 1f 765766bcd8Stedu mov REG_SRC,r0 775766bcd8Stedu tst #2,r0 /* (src & 2) { */ 785766bcd8Stedu bt 1f 795766bcd8Stedu mov.w @REG_SRC+,r0 /* *((unsigned short*)dst)++ = *((unsigned short*)src)++; */ 805766bcd8Stedu add #-2,REG_LEN /* len -= 2; */ 815766bcd8Stedu mov.w r0,@REG_DST 825766bcd8Stedu add #2,REG_DST /* } */ 835766bcd8Stedu1: 845766bcd8Stedu 855766bcd8Stedu 865766bcd8Stedu mov #3,r1 875766bcd8Stedu cmp/hi r1,REG_LEN /* while ( len > 3 ) { */ 885766bcd8Stedu bf/s no_align_delay 895766bcd8Stedu tst REG_LEN,REG_LEN 905766bcd8Stedu2: 915766bcd8Stedu mov.l @REG_SRC+,r0 /* *((unsigned long*)dst)++ = *((unsigned long*)src)++; */ 925766bcd8Stedu add #-4,REG_LEN /* len -= 4; */ 935766bcd8Stedu mov.l r0,@REG_DST 945766bcd8Stedu cmp/hi r1,REG_LEN 955766bcd8Stedu bt/s 2b 965766bcd8Stedu add #4,REG_DST /* } */ 975766bcd8Stedu 985766bcd8Stedu bra no_align_delay 995766bcd8Stedu tst REG_LEN,REG_LEN 1005766bcd8Stedu 1015766bcd8Stedu 1025766bcd8Steduword_align: 1035766bcd8Stedu mov r1,r0 1045766bcd8Stedu tst #1,r0 1055766bcd8Stedu bf/s no_align_delay 1065766bcd8Stedu tst REG_LEN,REG_LEN /* if ( len == 0 ) return; */ 1075766bcd8Stedu bt bcopy_return 1085766bcd8Stedu 1095766bcd8Stedu 1105766bcd8Stedu mov REG_SRC,r0 /* if ( src & 1 ) */ 1115766bcd8Stedu tst #1,r0 1125766bcd8Stedu bt 1f 1135766bcd8Stedu mov.b @REG_SRC+,r0 /* *dst++ = *src++; */ 1145766bcd8Stedu add #-1,REG_LEN 1155766bcd8Stedu mov.b r0,@REG_DST 1165766bcd8Stedu add #1,REG_DST 1175766bcd8Stedu1: 1185766bcd8Stedu 1195766bcd8Stedu 1205766bcd8Stedu mov #1,r1 1215766bcd8Stedu cmp/hi r1,REG_LEN /* while ( len > 1 ) { */ 1225766bcd8Stedu bf/s no_align_delay 1235766bcd8Stedu tst REG_LEN,REG_LEN 1245766bcd8Stedu2: 1255766bcd8Stedu mov.w @REG_SRC+,r0 /* *((unsigned short*)dst)++ = *((unsigned short*)src)++; */ 1265766bcd8Stedu add #-2,REG_LEN /* len -= 2; */ 1275766bcd8Stedu mov.w r0,@REG_DST 1285766bcd8Stedu cmp/hi r1,REG_LEN 1295766bcd8Stedu bt/s 2b 1305766bcd8Stedu add #2,REG_DST /* } */ 1315766bcd8Stedu 1325766bcd8Stedu 1335766bcd8Steduno_align: 1345766bcd8Stedu tst REG_LEN,REG_LEN /* while ( len!= ) { */ 1355766bcd8Steduno_align_delay: 1365766bcd8Stedu bt bcopy_return 1375766bcd8Stedu1: 1385766bcd8Stedu mov.b @REG_SRC+,r0 /* *dst++ = *src++; */ 1395766bcd8Stedu add #-1,REG_LEN /* len--; */ 1405766bcd8Stedu mov.b r0,@REG_DST 1415766bcd8Stedu tst REG_LEN,REG_LEN 1425766bcd8Stedu bf/s 1b 1435766bcd8Stedu add #1,REG_DST /* } */ 1445766bcd8Stedubcopy_return: 1455766bcd8Stedu rts 1465766bcd8Stedu mov REG_DST0,r0 1475766bcd8Stedu 1485766bcd8Stedubcopy_overlap: 1495766bcd8Stedu add REG_LEN,REG_SRC 1505766bcd8Stedu add REG_LEN,REG_DST 1515766bcd8Stedu 1525766bcd8Stedu mov REG_SRC,r0 1535766bcd8Stedu xor REG_DST,r0 1545766bcd8Stedu and #3,r0 1555766bcd8Stedu mov r0,r1 1565766bcd8Stedu tst r0,r0 /* (src ^ dst) & 3 */ 1575766bcd8Stedu bf/s ov_word_align 1585766bcd8Stedu 1595766bcd8Steduov_longword_align: 1605766bcd8Stedu tst REG_LEN,REG_LEN /* if ( len==0 ) return; */ 1615766bcd8Stedu bt/s bcopy_return 1625766bcd8Stedu 1635766bcd8Stedu 1645766bcd8Stedu mov REG_SRC,r0 1655766bcd8Stedu tst #1,r0 /* if ( src & 1 ) */ 1665766bcd8Stedu bt 1f 1675766bcd8Stedu add #-1,REG_SRC /* *--dst = *--src; */ 1685766bcd8Stedu mov.b @REG_SRC,r0 1695766bcd8Stedu mov.b r0,@-REG_DST 1705766bcd8Stedu add #-1,REG_LEN 1715766bcd8Stedu1: 1725766bcd8Stedu 1735766bcd8Stedu 1745766bcd8Stedu mov #1,r0 1755766bcd8Stedu cmp/hi r0,REG_LEN /* if ( (len > 1) && */ 1765766bcd8Stedu bf/s 1f 1775766bcd8Stedu mov REG_SRC,r0 1785766bcd8Stedu tst #2,r0 /* (src & 2) { */ 1795766bcd8Stedu bt 1f 1805766bcd8Stedu add #-2,REG_SRC /* *--((unsigned short*)dst) = *--((unsigned short*)src); */ 1815766bcd8Stedu mov.w @REG_SRC,r0 1825766bcd8Stedu add #-2,REG_LEN /* len -= 2; */ 1835766bcd8Stedu mov.w r0,@-REG_DST /* } */ 1845766bcd8Stedu1: 1855766bcd8Stedu 1865766bcd8Stedu 1875766bcd8Stedu mov #3,r1 1885766bcd8Stedu cmp/hi r1,REG_LEN /* while ( len > 3 ) { */ 1895766bcd8Stedu bf/s ov_no_align_delay 1905766bcd8Stedu tst REG_LEN,REG_LEN 1915766bcd8Stedu2: 1925766bcd8Stedu add #-4,REG_SRC 1935766bcd8Stedu mov.l @REG_SRC,r0 /* *((unsigned long*)dst)++ = *((unsigned long*)src)++; */ 1945766bcd8Stedu add #-4,REG_LEN /* len -= 4; */ 1955766bcd8Stedu cmp/hi r1,REG_LEN 1965766bcd8Stedu bt/s 2b 1975766bcd8Stedu mov.l r0,@-REG_DST /* } */ 1985766bcd8Stedu 1995766bcd8Stedu bra ov_no_align_delay 2005766bcd8Stedu tst REG_LEN,REG_LEN 2015766bcd8Stedu 2025766bcd8Stedu 2035766bcd8Steduov_word_align: 2045766bcd8Stedu mov r1,r0 2055766bcd8Stedu tst #1,r0 2065766bcd8Stedu bf/s ov_no_align_delay 2075766bcd8Stedu tst REG_LEN,REG_LEN /* if ( len == 0 ) return; */ 2085766bcd8Stedu bt bcopy_return 2095766bcd8Stedu 2105766bcd8Stedu 2115766bcd8Stedu mov REG_SRC,r0 /* if ( src & 1 ) */ 2125766bcd8Stedu tst #1,r0 2135766bcd8Stedu bt 1f 2145766bcd8Stedu add #-1,REG_SRC 2155766bcd8Stedu mov.b @REG_SRC,r0 /* *--dst = *--src; */ 2165766bcd8Stedu add #-1,REG_LEN 2175766bcd8Stedu mov.b r0,@-REG_DST 2185766bcd8Stedu1: 2195766bcd8Stedu 2205766bcd8Stedu 2215766bcd8Stedu mov #1,r1 2225766bcd8Stedu cmp/hi r1,REG_LEN /* while ( len > 1 ) { */ 2235766bcd8Stedu bf/s ov_no_align_delay 2245766bcd8Stedu tst REG_LEN,REG_LEN 2255766bcd8Stedu2: 2265766bcd8Stedu add #-2,REG_SRC 2275766bcd8Stedu mov.w @REG_SRC,r0 /* *--((unsigned short*)dst) = *--((unsigned short*)src); */ 2285766bcd8Stedu add #-2,REG_LEN /* len -= 2; */ 2295766bcd8Stedu cmp/hi r1,REG_LEN 2305766bcd8Stedu bt/s 2b 2315766bcd8Stedu mov.w r0,@-REG_DST /* } */ 2325766bcd8Stedu 2335766bcd8Stedu 2345766bcd8Steduov_no_align: 2355766bcd8Stedu tst REG_LEN,REG_LEN /* while ( len!= ) { */ 2365766bcd8Steduov_no_align_delay: 2375766bcd8Stedu bt 9f 2385766bcd8Stedu1: 2395766bcd8Stedu add #-1,REG_SRC 2405766bcd8Stedu mov.b @REG_SRC,r0 /* *--dst = *--src; */ 2415766bcd8Stedu add #-1,REG_LEN /* len--; */ 2425766bcd8Stedu tst REG_LEN,REG_LEN 2435766bcd8Stedu bf/s 1b 2445766bcd8Stedu mov.b r0,@-REG_DST /* } */ 2455766bcd8Stedu9: 2465766bcd8Stedu rts 2475766bcd8Stedu mov REG_DST0,r0 248