1/* $NetBSD: memcpy.S,v 1.3 2024/02/07 04:20:25 msaitoh Exp $ */ 2 3/* 4 * Copyright (c) 2000 SHIMIZU Ryo 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. The name of the author may not be used to endorse or promote products 16 * derived from this software without specific prior written permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30#include <machine/asm.h> 31 32#if defined(LIBC_SCCS) && !defined(lint) 33 RCSID("$NetBSD: memcpy.S,v 1.3 2024/02/07 04:20:25 msaitoh Exp $") 34#endif 35 36#if !defined(MEMCOPY) && !defined(MEMMOVE) && !defined(BCOPY) 37#define MEMCOPY 38#endif 39 40#if defined(MEMCOPY) || defined(MEMMOVE) 41#define REG_DST0 r3 42#define REG_SRC r5 43#define REG_DST r4 44#else 45#define REG_SRC r4 46#define REG_DST r5 47#endif 48 49#define REG_LEN r6 50 51#if defined(MEMCOPY) 52ENTRY(memcpy) 53#elif defined(MEMMOVE) 54ENTRY(memmove) 55#elif defined(BCOPY) 56ENTRY(bcopy) 57#endif 58#ifdef REG_DST0 59 mov REG_DST,REG_DST0 60#endif 61 cmp/eq REG_DST,REG_SRC /* if ( src == dst ) return; */ 62 bt/s bcopy_return 63 cmp/hi REG_DST,REG_SRC 64 bf/s bcopy_overlap 65 66 mov REG_SRC,r0 67 xor REG_DST,r0 68 and #3,r0 69 mov r0,r1 70 tst r0,r0 /* (src ^ dst) & 3 */ 71 bf/s word_align 72 73longword_align: 74 tst REG_LEN,REG_LEN /* if ( len==0 ) return; */ 75 bt/s bcopy_return 76 77 78 mov REG_SRC,r0 79 tst #1,r0 /* if ( src & 1 ) */ 80 bt 1f 81 mov.b @REG_SRC+,r0 /* *dst++ = *src++; */ 82 add #-1,REG_LEN 83 mov.b r0,@REG_DST 84 add #1,REG_DST 851: 86 87 88 mov #1,r0 89 cmp/hi r0,REG_LEN /* if ( (len > 1) && */ 90 bf/s 1f 91 mov REG_SRC,r0 92 tst #2,r0 /* (src & 2) { */ 93 bt 1f 94 mov.w @REG_SRC+,r0 /* *((unsigned short*)dst)++ = *((unsigned short*)src)++; */ 95 add #-2,REG_LEN /* len -= 2; */ 96 mov.w r0,@REG_DST 97 add #2,REG_DST /* } */ 981: 99 100 101 mov #3,r1 102 cmp/hi r1,REG_LEN /* while ( len > 3 ) { */ 103 bf/s no_align_delay 104 tst REG_LEN,REG_LEN 1052: 106 mov.l @REG_SRC+,r0 /* *((unsigned long*)dst)++ = *((unsigned long*)src)++; */ 107 add #-4,REG_LEN /* len -= 4; */ 108 mov.l r0,@REG_DST 109 cmp/hi r1,REG_LEN 110 bt/s 2b 111 add #4,REG_DST /* } */ 112 113 bra no_align_delay 114 tst REG_LEN,REG_LEN 115 116 117word_align: 118 mov r1,r0 119 tst #1,r0 120 bf/s no_align_delay 121 tst REG_LEN,REG_LEN /* if ( len == 0 ) return; */ 122 bt bcopy_return 123 124 125 mov REG_SRC,r0 /* if ( src & 1 ) */ 126 tst #1,r0 127 bt 1f 128 mov.b @REG_SRC+,r0 /* *dst++ = *src++; */ 129 add #-1,REG_LEN 130 mov.b r0,@REG_DST 131 add #1,REG_DST 1321: 133 134 135 mov #1,r1 136 cmp/hi r1,REG_LEN /* while ( len > 1 ) { */ 137 bf/s no_align_delay 138 tst REG_LEN,REG_LEN 1392: 140 mov.w @REG_SRC+,r0 /* *((unsigned short*)dst)++ = *((unsigned short*)src)++; */ 141 add #-2,REG_LEN /* len -= 2; */ 142 mov.w r0,@REG_DST 143 cmp/hi r1,REG_LEN 144 bt/s 2b 145 add #2,REG_DST /* } */ 146 147 148no_align: 149 tst REG_LEN,REG_LEN /* while ( len!= ) { */ 150no_align_delay: 151 bt bcopy_return 1521: 153 mov.b @REG_SRC+,r0 /* *dst++ = *src++; */ 154 add #-1,REG_LEN /* len--; */ 155 mov.b r0,@REG_DST 156 tst REG_LEN,REG_LEN 157 bf/s 1b 158 add #1,REG_DST /* } */ 159bcopy_return: 160 rts 161#ifdef REG_DST0 162 mov REG_DST0,r0 163#else 164 nop 165#endif 166 167 168bcopy_overlap: 169 add REG_LEN,REG_SRC 170 add REG_LEN,REG_DST 171 172 mov REG_SRC,r0 173 xor REG_DST,r0 174 and #3,r0 175 mov r0,r1 176 tst r0,r0 /* (src ^ dst) & 3 */ 177 bf/s ov_word_align 178 179ov_longword_align: 180 tst REG_LEN,REG_LEN /* if ( len==0 ) return; */ 181 bt/s bcopy_return 182 183 184 mov REG_SRC,r0 185 tst #1,r0 /* if ( src & 1 ) */ 186 bt 1f 187 add #-1,REG_SRC /* *--dst = *--src; */ 188 mov.b @REG_SRC,r0 189 mov.b r0,@-REG_DST 190 add #-1,REG_LEN 1911: 192 193 194 mov #1,r0 195 cmp/hi r0,REG_LEN /* if ( (len > 1) && */ 196 bf/s 1f 197 mov REG_SRC,r0 198 tst #2,r0 /* (src & 2) { */ 199 bt 1f 200 add #-2,REG_SRC /* *--((unsigned short*)dst) = *--((unsigned short*)src); */ 201 mov.w @REG_SRC,r0 202 add #-2,REG_LEN /* len -= 2; */ 203 mov.w r0,@-REG_DST /* } */ 2041: 205 206 207 mov #3,r1 208 cmp/hi r1,REG_LEN /* while ( len > 3 ) { */ 209 bf/s ov_no_align_delay 210 tst REG_LEN,REG_LEN 2112: 212 add #-4,REG_SRC 213 mov.l @REG_SRC,r0 /* *((unsigned long*)dst)++ = *((unsigned long*)src)++; */ 214 add #-4,REG_LEN /* len -= 4; */ 215 cmp/hi r1,REG_LEN 216 bt/s 2b 217 mov.l r0,@-REG_DST /* } */ 218 219 bra ov_no_align_delay 220 tst REG_LEN,REG_LEN 221 222 223ov_word_align: 224 mov r1,r0 225 tst #1,r0 226 bf/s ov_no_align_delay 227 tst REG_LEN,REG_LEN /* if ( len == 0 ) return; */ 228 bt bcopy_return 229 230 231 mov REG_SRC,r0 /* if ( src & 1 ) */ 232 tst #1,r0 233 bt 1f 234 add #-1,REG_SRC 235 mov.b @REG_SRC,r0 /* *--dst = *--src; */ 236 add #-1,REG_LEN 237 mov.b r0,@-REG_DST 2381: 239 240 241 mov #1,r1 242 cmp/hi r1,REG_LEN /* while ( len > 1 ) { */ 243 bf/s ov_no_align_delay 244 tst REG_LEN,REG_LEN 2452: 246 add #-2,REG_SRC 247 mov.w @REG_SRC,r0 /* *--((unsigned short*)dst) = *--((unsigned short*)src); */ 248 add #-2,REG_LEN /* len -= 2; */ 249 cmp/hi r1,REG_LEN 250 bt/s 2b 251 mov.w r0,@-REG_DST /* } */ 252 253 254ov_no_align: 255 tst REG_LEN,REG_LEN /* while ( len!= ) { */ 256ov_no_align_delay: 257 bt 9f 2581: 259 add #-1,REG_SRC 260 mov.b @REG_SRC,r0 /* *--dst = *--src; */ 261 add #-1,REG_LEN /* len--; */ 262 tst REG_LEN,REG_LEN 263 bf/s 1b 264 mov.b r0,@-REG_DST /* } */ 2659: 266 rts 267#ifdef REG_DST0 268 mov REG_DST0,r0 269#else 270 nop 271#endif 272