1*90313c06Smsaitoh/* $NetBSD: memcpy.S,v 1.3 2024/02/07 04:20:25 msaitoh Exp $ */ 237c9f0a6Schristos 3c71562d6Suwe/* 4*90313c06Smsaitoh * Copyright (c) 2000 SHIMIZU Ryo 5c71562d6Suwe * All rights reserved. 6c71562d6Suwe * 7c71562d6Suwe * Redistribution and use in source and binary forms, with or without 8c71562d6Suwe * modification, are permitted provided that the following conditions 9c71562d6Suwe * are met: 10c71562d6Suwe * 1. Redistributions of source code must retain the above copyright 11c71562d6Suwe * notice, this list of conditions and the following disclaimer. 12c71562d6Suwe * 2. Redistributions in binary form must reproduce the above copyright 13c71562d6Suwe * notice, this list of conditions and the following disclaimer in the 14c71562d6Suwe * documentation and/or other materials provided with the distribution. 15c71562d6Suwe * 3. The name of the author may not be used to endorse or promote products 16c71562d6Suwe * derived from this software without specific prior written permission. 17c71562d6Suwe * 18c71562d6Suwe * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19c71562d6Suwe * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20c71562d6Suwe * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21c71562d6Suwe * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22c71562d6Suwe * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23c71562d6Suwe * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24c71562d6Suwe * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25c71562d6Suwe * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26c71562d6Suwe * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27c71562d6Suwe * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28c71562d6Suwe */ 29c71562d6Suwe 30c71562d6Suwe#include <machine/asm.h> 31c71562d6Suwe 32c71562d6Suwe#if defined(LIBC_SCCS) && !defined(lint) 33*90313c06Smsaitoh RCSID("$NetBSD: memcpy.S,v 1.3 2024/02/07 04:20:25 msaitoh Exp $") 34c71562d6Suwe#endif 35c71562d6Suwe 36c71562d6Suwe#if !defined(MEMCOPY) && !defined(MEMMOVE) && !defined(BCOPY) 3737c9f0a6Schristos#define MEMCOPY 38c71562d6Suwe#endif 39c71562d6Suwe 40c71562d6Suwe#if defined(MEMCOPY) || defined(MEMMOVE) 41c71562d6Suwe#define REG_DST0 r3 42c71562d6Suwe#define REG_SRC r5 43c71562d6Suwe#define REG_DST r4 44c71562d6Suwe#else 45c71562d6Suwe#define REG_SRC r4 46c71562d6Suwe#define REG_DST r5 47c71562d6Suwe#endif 48c71562d6Suwe 49c71562d6Suwe#define REG_LEN r6 50c71562d6Suwe 51c71562d6Suwe#if defined(MEMCOPY) 52c71562d6SuweENTRY(memcpy) 53c71562d6Suwe#elif defined(MEMMOVE) 54c71562d6SuweENTRY(memmove) 55c71562d6Suwe#elif defined(BCOPY) 56c71562d6SuweENTRY(bcopy) 57c71562d6Suwe#endif 58c71562d6Suwe#ifdef REG_DST0 59c71562d6Suwe mov REG_DST,REG_DST0 60c71562d6Suwe#endif 61c71562d6Suwe cmp/eq REG_DST,REG_SRC /* if ( src == dst ) return; */ 62c71562d6Suwe bt/s bcopy_return 63c71562d6Suwe cmp/hi REG_DST,REG_SRC 64c71562d6Suwe bf/s bcopy_overlap 65c71562d6Suwe 66c71562d6Suwe mov REG_SRC,r0 67c71562d6Suwe xor REG_DST,r0 68c71562d6Suwe and #3,r0 69c71562d6Suwe mov r0,r1 70c71562d6Suwe tst r0,r0 /* (src ^ dst) & 3 */ 71c71562d6Suwe bf/s word_align 72c71562d6Suwe 73c71562d6Suwelongword_align: 74c71562d6Suwe tst REG_LEN,REG_LEN /* if ( len==0 ) return; */ 75c71562d6Suwe bt/s bcopy_return 76c71562d6Suwe 77c71562d6Suwe 78c71562d6Suwe mov REG_SRC,r0 79c71562d6Suwe tst #1,r0 /* if ( src & 1 ) */ 80c71562d6Suwe bt 1f 81c71562d6Suwe mov.b @REG_SRC+,r0 /* *dst++ = *src++; */ 82c71562d6Suwe add #-1,REG_LEN 83c71562d6Suwe mov.b r0,@REG_DST 84c71562d6Suwe add #1,REG_DST 85c71562d6Suwe1: 86c71562d6Suwe 87c71562d6Suwe 88c71562d6Suwe mov #1,r0 89c71562d6Suwe cmp/hi r0,REG_LEN /* if ( (len > 1) && */ 90c71562d6Suwe bf/s 1f 91c71562d6Suwe mov REG_SRC,r0 92c71562d6Suwe tst #2,r0 /* (src & 2) { */ 93c71562d6Suwe bt 1f 94c71562d6Suwe mov.w @REG_SRC+,r0 /* *((unsigned short*)dst)++ = *((unsigned short*)src)++; */ 95c71562d6Suwe add #-2,REG_LEN /* len -= 2; */ 96c71562d6Suwe mov.w r0,@REG_DST 97c71562d6Suwe add #2,REG_DST /* } */ 98c71562d6Suwe1: 99c71562d6Suwe 100c71562d6Suwe 101c71562d6Suwe mov #3,r1 102c71562d6Suwe cmp/hi r1,REG_LEN /* while ( len > 3 ) { */ 103c71562d6Suwe bf/s no_align_delay 104c71562d6Suwe tst REG_LEN,REG_LEN 105c71562d6Suwe2: 106c71562d6Suwe mov.l @REG_SRC+,r0 /* *((unsigned long*)dst)++ = *((unsigned long*)src)++; */ 107c71562d6Suwe add #-4,REG_LEN /* len -= 4; */ 108c71562d6Suwe mov.l r0,@REG_DST 109c71562d6Suwe cmp/hi r1,REG_LEN 110c71562d6Suwe bt/s 2b 111c71562d6Suwe add #4,REG_DST /* } */ 112c71562d6Suwe 113c71562d6Suwe bra no_align_delay 114c71562d6Suwe tst REG_LEN,REG_LEN 115c71562d6Suwe 116c71562d6Suwe 117c71562d6Suweword_align: 118c71562d6Suwe mov r1,r0 119c71562d6Suwe tst #1,r0 120c71562d6Suwe bf/s no_align_delay 121c71562d6Suwe tst REG_LEN,REG_LEN /* if ( len == 0 ) return; */ 122c71562d6Suwe bt bcopy_return 123c71562d6Suwe 124c71562d6Suwe 125c71562d6Suwe mov REG_SRC,r0 /* if ( src & 1 ) */ 126c71562d6Suwe tst #1,r0 127c71562d6Suwe bt 1f 128c71562d6Suwe mov.b @REG_SRC+,r0 /* *dst++ = *src++; */ 129c71562d6Suwe add #-1,REG_LEN 130c71562d6Suwe mov.b r0,@REG_DST 131c71562d6Suwe add #1,REG_DST 132c71562d6Suwe1: 133c71562d6Suwe 134c71562d6Suwe 135c71562d6Suwe mov #1,r1 136c71562d6Suwe cmp/hi r1,REG_LEN /* while ( len > 1 ) { */ 137c71562d6Suwe bf/s no_align_delay 138c71562d6Suwe tst REG_LEN,REG_LEN 139c71562d6Suwe2: 140c71562d6Suwe mov.w @REG_SRC+,r0 /* *((unsigned short*)dst)++ = *((unsigned short*)src)++; */ 141c71562d6Suwe add #-2,REG_LEN /* len -= 2; */ 142c71562d6Suwe mov.w r0,@REG_DST 143c71562d6Suwe cmp/hi r1,REG_LEN 144c71562d6Suwe bt/s 2b 145c71562d6Suwe add #2,REG_DST /* } */ 146c71562d6Suwe 147c71562d6Suwe 148c71562d6Suweno_align: 149c71562d6Suwe tst REG_LEN,REG_LEN /* while ( len!= ) { */ 150c71562d6Suweno_align_delay: 151c71562d6Suwe bt bcopy_return 152c71562d6Suwe1: 153c71562d6Suwe mov.b @REG_SRC+,r0 /* *dst++ = *src++; */ 154c71562d6Suwe add #-1,REG_LEN /* len--; */ 155c71562d6Suwe mov.b r0,@REG_DST 156c71562d6Suwe tst REG_LEN,REG_LEN 157c71562d6Suwe bf/s 1b 158c71562d6Suwe add #1,REG_DST /* } */ 159c71562d6Suwebcopy_return: 160c71562d6Suwe rts 161c71562d6Suwe#ifdef REG_DST0 162c71562d6Suwe mov REG_DST0,r0 163c71562d6Suwe#else 164c71562d6Suwe nop 165c71562d6Suwe#endif 166c71562d6Suwe 167c71562d6Suwe 168c71562d6Suwebcopy_overlap: 169c71562d6Suwe add REG_LEN,REG_SRC 170c71562d6Suwe add REG_LEN,REG_DST 171c71562d6Suwe 172c71562d6Suwe mov REG_SRC,r0 173c71562d6Suwe xor REG_DST,r0 174c71562d6Suwe and #3,r0 175c71562d6Suwe mov r0,r1 176c71562d6Suwe tst r0,r0 /* (src ^ dst) & 3 */ 177c71562d6Suwe bf/s ov_word_align 178c71562d6Suwe 179c71562d6Suweov_longword_align: 180c71562d6Suwe tst REG_LEN,REG_LEN /* if ( len==0 ) return; */ 181c71562d6Suwe bt/s bcopy_return 182c71562d6Suwe 183c71562d6Suwe 184c71562d6Suwe mov REG_SRC,r0 185c71562d6Suwe tst #1,r0 /* if ( src & 1 ) */ 186c71562d6Suwe bt 1f 187c71562d6Suwe add #-1,REG_SRC /* *--dst = *--src; */ 188c71562d6Suwe mov.b @REG_SRC,r0 189c71562d6Suwe mov.b r0,@-REG_DST 190c71562d6Suwe add #-1,REG_LEN 191c71562d6Suwe1: 192c71562d6Suwe 193c71562d6Suwe 194c71562d6Suwe mov #1,r0 195c71562d6Suwe cmp/hi r0,REG_LEN /* if ( (len > 1) && */ 196c71562d6Suwe bf/s 1f 197c71562d6Suwe mov REG_SRC,r0 198c71562d6Suwe tst #2,r0 /* (src & 2) { */ 199c71562d6Suwe bt 1f 200c71562d6Suwe add #-2,REG_SRC /* *--((unsigned short*)dst) = *--((unsigned short*)src); */ 201c71562d6Suwe mov.w @REG_SRC,r0 202c71562d6Suwe add #-2,REG_LEN /* len -= 2; */ 203c71562d6Suwe mov.w r0,@-REG_DST /* } */ 204c71562d6Suwe1: 205c71562d6Suwe 206c71562d6Suwe 207c71562d6Suwe mov #3,r1 208c71562d6Suwe cmp/hi r1,REG_LEN /* while ( len > 3 ) { */ 209c71562d6Suwe bf/s ov_no_align_delay 210c71562d6Suwe tst REG_LEN,REG_LEN 211c71562d6Suwe2: 212c71562d6Suwe add #-4,REG_SRC 213c71562d6Suwe mov.l @REG_SRC,r0 /* *((unsigned long*)dst)++ = *((unsigned long*)src)++; */ 214c71562d6Suwe add #-4,REG_LEN /* len -= 4; */ 215c71562d6Suwe cmp/hi r1,REG_LEN 216c71562d6Suwe bt/s 2b 217c71562d6Suwe mov.l r0,@-REG_DST /* } */ 218c71562d6Suwe 219c71562d6Suwe bra ov_no_align_delay 220c71562d6Suwe tst REG_LEN,REG_LEN 221c71562d6Suwe 222c71562d6Suwe 223c71562d6Suweov_word_align: 224c71562d6Suwe mov r1,r0 225c71562d6Suwe tst #1,r0 226c71562d6Suwe bf/s ov_no_align_delay 227c71562d6Suwe tst REG_LEN,REG_LEN /* if ( len == 0 ) return; */ 228c71562d6Suwe bt bcopy_return 229c71562d6Suwe 230c71562d6Suwe 231c71562d6Suwe mov REG_SRC,r0 /* if ( src & 1 ) */ 232c71562d6Suwe tst #1,r0 233c71562d6Suwe bt 1f 234c71562d6Suwe add #-1,REG_SRC 235c71562d6Suwe mov.b @REG_SRC,r0 /* *--dst = *--src; */ 236c71562d6Suwe add #-1,REG_LEN 237c71562d6Suwe mov.b r0,@-REG_DST 238c71562d6Suwe1: 239c71562d6Suwe 240c71562d6Suwe 241c71562d6Suwe mov #1,r1 242c71562d6Suwe cmp/hi r1,REG_LEN /* while ( len > 1 ) { */ 243c71562d6Suwe bf/s ov_no_align_delay 244c71562d6Suwe tst REG_LEN,REG_LEN 245c71562d6Suwe2: 246c71562d6Suwe add #-2,REG_SRC 247c71562d6Suwe mov.w @REG_SRC,r0 /* *--((unsigned short*)dst) = *--((unsigned short*)src); */ 248c71562d6Suwe add #-2,REG_LEN /* len -= 2; */ 249c71562d6Suwe cmp/hi r1,REG_LEN 250c71562d6Suwe bt/s 2b 251c71562d6Suwe mov.w r0,@-REG_DST /* } */ 252c71562d6Suwe 253c71562d6Suwe 254c71562d6Suweov_no_align: 255c71562d6Suwe tst REG_LEN,REG_LEN /* while ( len!= ) { */ 256c71562d6Suweov_no_align_delay: 257c71562d6Suwe bt 9f 258c71562d6Suwe1: 259c71562d6Suwe add #-1,REG_SRC 260c71562d6Suwe mov.b @REG_SRC,r0 /* *--dst = *--src; */ 261c71562d6Suwe add #-1,REG_LEN /* len--; */ 262c71562d6Suwe tst REG_LEN,REG_LEN 263c71562d6Suwe bf/s 1b 264c71562d6Suwe mov.b r0,@-REG_DST /* } */ 265c71562d6Suwe9: 266c71562d6Suwe rts 267c71562d6Suwe#ifdef REG_DST0 268c71562d6Suwe mov REG_DST0,r0 269c71562d6Suwe#else 270c71562d6Suwe nop 271c71562d6Suwe#endif 272