1/* $NetBSD: memcpy_arm.S,v 1.3 2013/01/28 06:23:44 matt Exp $ */ 2 3/*- 4 * Copyright (c) 1997 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Neil A. Carson and Mark Brinicombe 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32#include <machine/asm.h> 33 34#if defined(__ARM_EABI__) 35STRONG_ALIAS(__aeabi_memcpy, memcpy) 36#endif 37 38/* 39 * This is one fun bit of code ... 40 * Some easy listening music is suggested while trying to understand this 41 * code e.g. Iron Maiden 42 * 43 * For anyone attempting to understand it : 44 * 45 * The core code is implemented here with simple stubs for memcpy(). 46 * 47 * All local labels are prefixed with Lmemcpy_ 48 * Following the prefix a label starting f is used in the forward copy code 49 * while a label using b is used in the backwards copy code 50 * The source and destination addresses determine whether a forward or 51 * backward copy is performed. 52 * Separate bits of code are used to deal with the following situations 53 * for both the forward and backwards copy. 54 * unaligned source address 55 * unaligned destination address 56 * Separate copy routines are used to produce an optimised result for each 57 * of these cases. 58 * The copy code will use LDM/STM instructions to copy up to 32 bytes at 59 * a time where possible. 60 * 61 * Note: r12 (aka ip) can be trashed during the function along with 62 * r0-r3 although r0-r2 have defined uses i.e. src, dest, len through out. 63 * Additional registers are preserved prior to use i.e. r4, r5 & lr 64 * 65 * Apologies for the state of the comments ;-) 66 */ 67/* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */ 68ENTRY(memcpy) 69 /* save leaf functions having to store this away */ 70 stmdb sp!, {r0, lr} /* memcpy() returns dest addr */ 71 72 subs r2, r2, #4 73 blt .Lmemcpy_l4 /* less than 4 bytes */ 74 ands r12, r0, #3 75 bne .Lmemcpy_destul /* oh unaligned destination addr */ 76 ands r12, r1, #3 77 bne .Lmemcpy_srcul /* oh unaligned source addr */ 78 79.Lmemcpy_t8: 80 /* We have aligned source and destination */ 81 subs r2, r2, #8 82 blt .Lmemcpy_l12 /* less than 12 bytes (4 from above) */ 83 subs r2, r2, #0x14 84 blt .Lmemcpy_l32 /* less than 32 bytes (12 from above) */ 85 stmdb sp!, {r4} /* borrow r4 */ 86 87 /* blat 32 bytes at a time */ 88 /* XXX for really big copies perhaps we should use more registers */ 89.Lmemcpy_loop32: 90 ldmia r1!, {r3, r4, r12, lr} 91 stmia r0!, {r3, r4, r12, lr} 92 ldmia r1!, {r3, r4, r12, lr} 93 stmia r0!, {r3, r4, r12, lr} 94 subs r2, r2, #0x20 95 bge .Lmemcpy_loop32 96 97 cmn r2, #0x10 98 ldmgeia r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 99 stmgeia r0!, {r3, r4, r12, lr} 100 subge r2, r2, #0x10 101 ldmia sp!, {r4} /* return r4 */ 102 103.Lmemcpy_l32: 104 adds r2, r2, #0x14 105 106 /* blat 12 bytes at a time */ 107.Lmemcpy_loop12: 108 ldmgeia r1!, {r3, r12, lr} 109 stmgeia r0!, {r3, r12, lr} 110 subges r2, r2, #0x0c 111 bge .Lmemcpy_loop12 112 113.Lmemcpy_l12: 114 adds r2, r2, #8 115 blt .Lmemcpy_l4 116 117 subs r2, r2, #4 118 ldrlt r3, [r1], #4 119 strlt r3, [r0], #4 120 ldmgeia r1!, {r3, r12} 121 stmgeia r0!, {r3, r12} 122 subge r2, r2, #4 123 124.Lmemcpy_l4: 125 /* less than 4 bytes to go */ 126 adds r2, r2, #4 127#ifdef __APCS_26_ 128 ldmeqia sp!, {r0, pc}^ /* done */ 129#else 130 ldmeqia sp!, {r0, pc} /* done */ 131#endif 132 /* copy the crud byte at a time */ 133 cmp r2, #2 134 ldrb r3, [r1], #1 135 strb r3, [r0], #1 136 ldrgeb r3, [r1], #1 137 strgeb r3, [r0], #1 138 ldrgtb r3, [r1], #1 139 strgtb r3, [r0], #1 140 ldmia sp!, {r0, pc} 141 142 /* erg - unaligned destination */ 143.Lmemcpy_destul: 144 rsb r12, r12, #4 145 cmp r12, #2 146 147 /* align destination with byte copies */ 148 ldrb r3, [r1], #1 149 strb r3, [r0], #1 150 ldrgeb r3, [r1], #1 151 strgeb r3, [r0], #1 152 ldrgtb r3, [r1], #1 153 strgtb r3, [r0], #1 154 subs r2, r2, r12 155 blt .Lmemcpy_l4 /* less the 4 bytes */ 156 157 ands r12, r1, #3 158 beq .Lmemcpy_t8 /* we have an aligned source */ 159 160 /* erg - unaligned source */ 161 /* This is where it gets nasty ... */ 162.Lmemcpy_srcul: 163 bic r1, r1, #3 164 ldr lr, [r1], #4 165 cmp r12, #2 166 bgt .Lmemcpy_srcul3 167 beq .Lmemcpy_srcul2 168 cmp r2, #0x0c 169 blt .Lmemcpy_srcul1loop4 170 sub r2, r2, #0x0c 171 stmdb sp!, {r4, r5} 172 173.Lmemcpy_srcul1loop16: 174#ifdef __ARMEB__ 175 mov r3, lr, lsl #8 176#else 177 mov r3, lr, lsr #8 178#endif 179 ldmia r1!, {r4, r5, r12, lr} 180#ifdef __ARMEB__ 181 orr r3, r3, r4, lsr #24 182 mov r4, r4, lsl #8 183 orr r4, r4, r5, lsr #24 184 mov r5, r5, lsl #8 185 orr r5, r5, r12, lsr #24 186 mov r12, r12, lsl #8 187 orr r12, r12, lr, lsr #24 188#else 189 orr r3, r3, r4, lsl #24 190 mov r4, r4, lsr #8 191 orr r4, r4, r5, lsl #24 192 mov r5, r5, lsr #8 193 orr r5, r5, r12, lsl #24 194 mov r12, r12, lsr #8 195 orr r12, r12, lr, lsl #24 196#endif 197 stmia r0!, {r3-r5, r12} 198 subs r2, r2, #0x10 199 bge .Lmemcpy_srcul1loop16 200 ldmia sp!, {r4, r5} 201 adds r2, r2, #0x0c 202 blt .Lmemcpy_srcul1l4 203 204.Lmemcpy_srcul1loop4: 205#ifdef __ARMEB__ 206 mov r12, lr, lsl #8 207#else 208 mov r12, lr, lsr #8 209#endif 210 ldr lr, [r1], #4 211#ifdef __ARMEB__ 212 orr r12, r12, lr, lsr #24 213#else 214 orr r12, r12, lr, lsl #24 215#endif 216 str r12, [r0], #4 217 subs r2, r2, #4 218 bge .Lmemcpy_srcul1loop4 219 220.Lmemcpy_srcul1l4: 221 sub r1, r1, #3 222 b .Lmemcpy_l4 223 224.Lmemcpy_srcul2: 225 cmp r2, #0x0c 226 blt .Lmemcpy_srcul2loop4 227 sub r2, r2, #0x0c 228 stmdb sp!, {r4, r5} 229 230.Lmemcpy_srcul2loop16: 231#ifdef __ARMEB__ 232 mov r3, lr, lsl #16 233#else 234 mov r3, lr, lsr #16 235#endif 236 ldmia r1!, {r4, r5, r12, lr} 237#ifdef __ARMEB__ 238 orr r3, r3, r4, lsr #16 239 mov r4, r4, lsl #16 240 orr r4, r4, r5, lsr #16 241 mov r5, r5, lsl #16 242 orr r5, r5, r12, lsr #16 243 mov r12, r12, lsl #16 244 orr r12, r12, lr, lsr #16 245#else 246 orr r3, r3, r4, lsl #16 247 mov r4, r4, lsr #16 248 orr r4, r4, r5, lsl #16 249 mov r5, r5, lsr #16 250 orr r5, r5, r12, lsl #16 251 mov r12, r12, lsr #16 252 orr r12, r12, lr, lsl #16 253#endif 254 stmia r0!, {r3-r5, r12} 255 subs r2, r2, #0x10 256 bge .Lmemcpy_srcul2loop16 257 ldmia sp!, {r4, r5} 258 adds r2, r2, #0x0c 259 blt .Lmemcpy_srcul2l4 260 261.Lmemcpy_srcul2loop4: 262#ifdef __ARMEB__ 263 mov r12, lr, lsl #16 264#else 265 mov r12, lr, lsr #16 266#endif 267 ldr lr, [r1], #4 268#ifdef __ARMEB__ 269 orr r12, r12, lr, lsr #16 270#else 271 orr r12, r12, lr, lsl #16 272#endif 273 str r12, [r0], #4 274 subs r2, r2, #4 275 bge .Lmemcpy_srcul2loop4 276 277.Lmemcpy_srcul2l4: 278 sub r1, r1, #2 279 b .Lmemcpy_l4 280 281.Lmemcpy_srcul3: 282 cmp r2, #0x0c 283 blt .Lmemcpy_srcul3loop4 284 sub r2, r2, #0x0c 285 stmdb sp!, {r4, r5} 286 287.Lmemcpy_srcul3loop16: 288#ifdef __ARMEB__ 289 mov r3, lr, lsl #24 290#else 291 mov r3, lr, lsr #24 292#endif 293 ldmia r1!, {r4, r5, r12, lr} 294#ifdef __ARMEB__ 295 orr r3, r3, r4, lsr #8 296 mov r4, r4, lsl #24 297 orr r4, r4, r5, lsr #8 298 mov r5, r5, lsl #24 299 orr r5, r5, r12, lsr #8 300 mov r12, r12, lsl #24 301 orr r12, r12, lr, lsr #8 302#else 303 orr r3, r3, r4, lsl #8 304 mov r4, r4, lsr #24 305 orr r4, r4, r5, lsl #8 306 mov r5, r5, lsr #24 307 orr r5, r5, r12, lsl #8 308 mov r12, r12, lsr #24 309 orr r12, r12, lr, lsl #8 310#endif 311 stmia r0!, {r3-r5, r12} 312 subs r2, r2, #0x10 313 bge .Lmemcpy_srcul3loop16 314 ldmia sp!, {r4, r5} 315 adds r2, r2, #0x0c 316 blt .Lmemcpy_srcul3l4 317 318.Lmemcpy_srcul3loop4: 319#ifdef __ARMEB__ 320 mov r12, lr, lsl #24 321#else 322 mov r12, lr, lsr #24 323#endif 324 ldr lr, [r1], #4 325#ifdef __ARMEB__ 326 orr r12, r12, lr, lsr #8 327#else 328 orr r12, r12, lr, lsl #8 329#endif 330 str r12, [r0], #4 331 subs r2, r2, #4 332 bge .Lmemcpy_srcul3loop4 333 334.Lmemcpy_srcul3l4: 335 sub r1, r1, #1 336 b .Lmemcpy_l4 337