1*84d9c625SLionel Sambuc/* $NetBSD: memcpy_arm.S,v 1.4 2013/08/11 04:56:32 matt Exp $ */ 2b6cbf720SGianluca Guida 3b6cbf720SGianluca Guida/*- 4b6cbf720SGianluca Guida * Copyright (c) 1997 The NetBSD Foundation, Inc. 5b6cbf720SGianluca Guida * All rights reserved. 6b6cbf720SGianluca Guida * 7b6cbf720SGianluca Guida * This code is derived from software contributed to The NetBSD Foundation 8b6cbf720SGianluca Guida * by Neil A. Carson and Mark Brinicombe 9b6cbf720SGianluca Guida * 10b6cbf720SGianluca Guida * Redistribution and use in source and binary forms, with or without 11b6cbf720SGianluca Guida * modification, are permitted provided that the following conditions 12b6cbf720SGianluca Guida * are met: 13b6cbf720SGianluca Guida * 1. Redistributions of source code must retain the above copyright 14b6cbf720SGianluca Guida * notice, this list of conditions and the following disclaimer. 15b6cbf720SGianluca Guida * 2. Redistributions in binary form must reproduce the above copyright 16b6cbf720SGianluca Guida * notice, this list of conditions and the following disclaimer in the 17b6cbf720SGianluca Guida * documentation and/or other materials provided with the distribution. 18b6cbf720SGianluca Guida * 19b6cbf720SGianluca Guida * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20b6cbf720SGianluca Guida * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21b6cbf720SGianluca Guida * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22b6cbf720SGianluca Guida * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23b6cbf720SGianluca Guida * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24b6cbf720SGianluca Guida * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25b6cbf720SGianluca Guida * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26b6cbf720SGianluca Guida * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27b6cbf720SGianluca Guida * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28b6cbf720SGianluca Guida * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29b6cbf720SGianluca Guida * POSSIBILITY OF SUCH DAMAGE. 30b6cbf720SGianluca Guida */ 31b6cbf720SGianluca Guida 32b6cbf720SGianluca Guida#include <machine/asm.h> 33b6cbf720SGianluca Guida 34*84d9c625SLionel Sambuc#if defined(__ARM_EABI__) 35*84d9c625SLionel SambucSTRONG_ALIAS(__aeabi_memcpy, memcpy) 36*84d9c625SLionel Sambuc#endif 37*84d9c625SLionel Sambuc 38b6cbf720SGianluca Guida/* 39b6cbf720SGianluca Guida * This is one fun bit of code ... 40b6cbf720SGianluca Guida * Some easy listening music is suggested while trying to understand this 41b6cbf720SGianluca Guida * code e.g. Iron Maiden 42b6cbf720SGianluca Guida * 43b6cbf720SGianluca Guida * For anyone attempting to understand it : 44b6cbf720SGianluca Guida * 45b6cbf720SGianluca Guida * The core code is implemented here with simple stubs for memcpy(). 46b6cbf720SGianluca Guida * 47b6cbf720SGianluca Guida * All local labels are prefixed with Lmemcpy_ 48b6cbf720SGianluca Guida * Following the prefix a label starting f is used in the forward copy code 49b6cbf720SGianluca Guida * while a label using b is used in the backwards copy code 50b6cbf720SGianluca Guida * The source and destination addresses determine whether a forward or 51b6cbf720SGianluca Guida * backward copy is performed. 52b6cbf720SGianluca Guida * Separate bits of code are used to deal with the following situations 53b6cbf720SGianluca Guida * for both the forward and backwards copy. 54b6cbf720SGianluca Guida * unaligned source address 55b6cbf720SGianluca Guida * unaligned destination address 56b6cbf720SGianluca Guida * Separate copy routines are used to produce an optimised result for each 57b6cbf720SGianluca Guida * of these cases. 58b6cbf720SGianluca Guida * The copy code will use LDM/STM instructions to copy up to 32 bytes at 59b6cbf720SGianluca Guida * a time where possible. 60b6cbf720SGianluca Guida * 61b6cbf720SGianluca Guida * Note: r12 (aka ip) can be trashed during the function along with 62b6cbf720SGianluca Guida * r0-r3 although r0-r2 have defined uses i.e. src, dest, len through out. 63b6cbf720SGianluca Guida * Additional registers are preserved prior to use i.e. r4, r5 & lr 64b6cbf720SGianluca Guida * 65b6cbf720SGianluca Guida * Apologies for the state of the comments ;-) 66b6cbf720SGianluca Guida */ 67b6cbf720SGianluca Guida/* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */ 68b6cbf720SGianluca GuidaENTRY(memcpy) 69b6cbf720SGianluca Guida /* save leaf functions having to store this away */ 70*84d9c625SLionel Sambuc push {r0, lr} /* memcpy() returns dest addr */ 71b6cbf720SGianluca Guida 72b6cbf720SGianluca Guida subs r2, r2, #4 73b6cbf720SGianluca Guida blt .Lmemcpy_l4 /* less than 4 bytes */ 74b6cbf720SGianluca Guida ands r12, r0, #3 75b6cbf720SGianluca Guida bne .Lmemcpy_destul /* oh unaligned destination addr */ 76b6cbf720SGianluca Guida ands r12, r1, #3 77b6cbf720SGianluca Guida bne .Lmemcpy_srcul /* oh unaligned source addr */ 78b6cbf720SGianluca Guida 79b6cbf720SGianluca Guida.Lmemcpy_t8: 80b6cbf720SGianluca Guida /* We have aligned source and destination */ 81b6cbf720SGianluca Guida subs r2, r2, #8 82b6cbf720SGianluca Guida blt .Lmemcpy_l12 /* less than 12 bytes (4 from above) */ 83b6cbf720SGianluca Guida subs r2, r2, #0x14 84b6cbf720SGianluca Guida blt .Lmemcpy_l32 /* less than 32 bytes (12 from above) */ 85*84d9c625SLionel Sambuc push {r4} /* borrow r4 */ 86b6cbf720SGianluca Guida 87b6cbf720SGianluca Guida /* blat 32 bytes at a time */ 88b6cbf720SGianluca Guida /* XXX for really big copies perhaps we should use more registers */ 89b6cbf720SGianluca Guida.Lmemcpy_loop32: 90b6cbf720SGianluca Guida ldmia r1!, {r3, r4, r12, lr} 91b6cbf720SGianluca Guida stmia r0!, {r3, r4, r12, lr} 92b6cbf720SGianluca Guida ldmia r1!, {r3, r4, r12, lr} 93b6cbf720SGianluca Guida stmia r0!, {r3, r4, r12, lr} 94b6cbf720SGianluca Guida subs r2, r2, #0x20 95b6cbf720SGianluca Guida bge .Lmemcpy_loop32 96b6cbf720SGianluca Guida 97b6cbf720SGianluca Guida cmn r2, #0x10 98*84d9c625SLionel Sambuc ldmiage r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 99*84d9c625SLionel Sambuc stmiage r0!, {r3, r4, r12, lr} 100b6cbf720SGianluca Guida subge r2, r2, #0x10 101*84d9c625SLionel Sambuc pop {r4} /* return r4 */ 102b6cbf720SGianluca Guida 103b6cbf720SGianluca Guida.Lmemcpy_l32: 104b6cbf720SGianluca Guida adds r2, r2, #0x14 105b6cbf720SGianluca Guida 106b6cbf720SGianluca Guida /* blat 12 bytes at a time */ 107b6cbf720SGianluca Guida.Lmemcpy_loop12: 108*84d9c625SLionel Sambuc ldmiage r1!, {r3, r12, lr} 109*84d9c625SLionel Sambuc stmiage r0!, {r3, r12, lr} 110*84d9c625SLionel Sambuc subsge r2, r2, #0x0c 111b6cbf720SGianluca Guida bge .Lmemcpy_loop12 112b6cbf720SGianluca Guida 113b6cbf720SGianluca Guida.Lmemcpy_l12: 114b6cbf720SGianluca Guida adds r2, r2, #8 115b6cbf720SGianluca Guida blt .Lmemcpy_l4 116b6cbf720SGianluca Guida 117b6cbf720SGianluca Guida subs r2, r2, #4 118b6cbf720SGianluca Guida ldrlt r3, [r1], #4 119b6cbf720SGianluca Guida strlt r3, [r0], #4 120*84d9c625SLionel Sambuc ldmiage r1!, {r3, r12} 121*84d9c625SLionel Sambuc stmiage r0!, {r3, r12} 122b6cbf720SGianluca Guida subge r2, r2, #4 123b6cbf720SGianluca Guida 124b6cbf720SGianluca Guida.Lmemcpy_l4: 125b6cbf720SGianluca Guida /* less than 4 bytes to go */ 126b6cbf720SGianluca Guida adds r2, r2, #4 127b6cbf720SGianluca Guida#ifdef __APCS_26_ 128*84d9c625SLionel Sambuc ldmiaeq sp!, {r0, pc}^ /* done */ 129b6cbf720SGianluca Guida#else 130*84d9c625SLionel Sambuc popeq {r0, pc} /* done */ 131b6cbf720SGianluca Guida#endif 132b6cbf720SGianluca Guida /* copy the crud byte at a time */ 133b6cbf720SGianluca Guida cmp r2, #2 134b6cbf720SGianluca Guida ldrb r3, [r1], #1 135b6cbf720SGianluca Guida strb r3, [r0], #1 136*84d9c625SLionel Sambuc ldrbge r3, [r1], #1 137*84d9c625SLionel Sambuc strbge r3, [r0], #1 138*84d9c625SLionel Sambuc ldrbgt r3, [r1], #1 139*84d9c625SLionel Sambuc strbgt r3, [r0], #1 140*84d9c625SLionel Sambuc pop {r0, pc} 141b6cbf720SGianluca Guida 142b6cbf720SGianluca Guida /* erg - unaligned destination */ 143b6cbf720SGianluca Guida.Lmemcpy_destul: 144b6cbf720SGianluca Guida rsb r12, r12, #4 145b6cbf720SGianluca Guida cmp r12, #2 146b6cbf720SGianluca Guida 147b6cbf720SGianluca Guida /* align destination with byte copies */ 148b6cbf720SGianluca Guida ldrb r3, [r1], #1 149b6cbf720SGianluca Guida strb r3, [r0], #1 150*84d9c625SLionel Sambuc ldrbge r3, [r1], #1 151*84d9c625SLionel Sambuc strbge r3, [r0], #1 152*84d9c625SLionel Sambuc ldrbgt r3, [r1], #1 153*84d9c625SLionel Sambuc strbgt r3, [r0], #1 154b6cbf720SGianluca Guida subs r2, r2, r12 155b6cbf720SGianluca Guida blt .Lmemcpy_l4 /* less the 4 bytes */ 156b6cbf720SGianluca Guida 157b6cbf720SGianluca Guida ands r12, r1, #3 158b6cbf720SGianluca Guida beq .Lmemcpy_t8 /* we have an aligned source */ 159b6cbf720SGianluca Guida 160b6cbf720SGianluca Guida /* erg - unaligned source */ 161b6cbf720SGianluca Guida /* This is where it gets nasty ... */ 162b6cbf720SGianluca Guida.Lmemcpy_srcul: 163b6cbf720SGianluca Guida bic r1, r1, #3 164b6cbf720SGianluca Guida ldr lr, [r1], #4 165b6cbf720SGianluca Guida cmp r12, #2 166b6cbf720SGianluca Guida bgt .Lmemcpy_srcul3 167b6cbf720SGianluca Guida beq .Lmemcpy_srcul2 168b6cbf720SGianluca Guida cmp r2, #0x0c 169b6cbf720SGianluca Guida blt .Lmemcpy_srcul1loop4 170b6cbf720SGianluca Guida sub r2, r2, #0x0c 171*84d9c625SLionel Sambuc push {r4, r5} 172b6cbf720SGianluca Guida 173b6cbf720SGianluca Guida.Lmemcpy_srcul1loop16: 174b6cbf720SGianluca Guida#ifdef __ARMEB__ 175b6cbf720SGianluca Guida mov r3, lr, lsl #8 176b6cbf720SGianluca Guida#else 177b6cbf720SGianluca Guida mov r3, lr, lsr #8 178b6cbf720SGianluca Guida#endif 179b6cbf720SGianluca Guida ldmia r1!, {r4, r5, r12, lr} 180b6cbf720SGianluca Guida#ifdef __ARMEB__ 181b6cbf720SGianluca Guida orr r3, r3, r4, lsr #24 182b6cbf720SGianluca Guida mov r4, r4, lsl #8 183b6cbf720SGianluca Guida orr r4, r4, r5, lsr #24 184b6cbf720SGianluca Guida mov r5, r5, lsl #8 185b6cbf720SGianluca Guida orr r5, r5, r12, lsr #24 186b6cbf720SGianluca Guida mov r12, r12, lsl #8 187b6cbf720SGianluca Guida orr r12, r12, lr, lsr #24 188b6cbf720SGianluca Guida#else 189b6cbf720SGianluca Guida orr r3, r3, r4, lsl #24 190b6cbf720SGianluca Guida mov r4, r4, lsr #8 191b6cbf720SGianluca Guida orr r4, r4, r5, lsl #24 192b6cbf720SGianluca Guida mov r5, r5, lsr #8 193b6cbf720SGianluca Guida orr r5, r5, r12, lsl #24 194b6cbf720SGianluca Guida mov r12, r12, lsr #8 195b6cbf720SGianluca Guida orr r12, r12, lr, lsl #24 196b6cbf720SGianluca Guida#endif 197b6cbf720SGianluca Guida stmia r0!, {r3-r5, r12} 198b6cbf720SGianluca Guida subs r2, r2, #0x10 199b6cbf720SGianluca Guida bge .Lmemcpy_srcul1loop16 200*84d9c625SLionel Sambuc pop {r4, r5} 201b6cbf720SGianluca Guida adds r2, r2, #0x0c 202b6cbf720SGianluca Guida blt .Lmemcpy_srcul1l4 203b6cbf720SGianluca Guida 204b6cbf720SGianluca Guida.Lmemcpy_srcul1loop4: 205b6cbf720SGianluca Guida#ifdef __ARMEB__ 206b6cbf720SGianluca Guida mov r12, lr, lsl #8 207b6cbf720SGianluca Guida#else 208b6cbf720SGianluca Guida mov r12, lr, lsr #8 209b6cbf720SGianluca Guida#endif 210b6cbf720SGianluca Guida ldr lr, [r1], #4 211b6cbf720SGianluca Guida#ifdef __ARMEB__ 212b6cbf720SGianluca Guida orr r12, r12, lr, lsr #24 213b6cbf720SGianluca Guida#else 214b6cbf720SGianluca Guida orr r12, r12, lr, lsl #24 215b6cbf720SGianluca Guida#endif 216b6cbf720SGianluca Guida str r12, [r0], #4 217b6cbf720SGianluca Guida subs r2, r2, #4 218b6cbf720SGianluca Guida bge .Lmemcpy_srcul1loop4 219b6cbf720SGianluca Guida 220b6cbf720SGianluca Guida.Lmemcpy_srcul1l4: 221b6cbf720SGianluca Guida sub r1, r1, #3 222b6cbf720SGianluca Guida b .Lmemcpy_l4 223b6cbf720SGianluca Guida 224b6cbf720SGianluca Guida.Lmemcpy_srcul2: 225b6cbf720SGianluca Guida cmp r2, #0x0c 226b6cbf720SGianluca Guida blt .Lmemcpy_srcul2loop4 227b6cbf720SGianluca Guida sub r2, r2, #0x0c 228*84d9c625SLionel Sambuc push {r4, r5} 229b6cbf720SGianluca Guida 230b6cbf720SGianluca Guida.Lmemcpy_srcul2loop16: 231b6cbf720SGianluca Guida#ifdef __ARMEB__ 232b6cbf720SGianluca Guida mov r3, lr, lsl #16 233b6cbf720SGianluca Guida#else 234b6cbf720SGianluca Guida mov r3, lr, lsr #16 235b6cbf720SGianluca Guida#endif 236b6cbf720SGianluca Guida ldmia r1!, {r4, r5, r12, lr} 237b6cbf720SGianluca Guida#ifdef __ARMEB__ 238b6cbf720SGianluca Guida orr r3, r3, r4, lsr #16 239b6cbf720SGianluca Guida mov r4, r4, lsl #16 240b6cbf720SGianluca Guida orr r4, r4, r5, lsr #16 241b6cbf720SGianluca Guida mov r5, r5, lsl #16 242b6cbf720SGianluca Guida orr r5, r5, r12, lsr #16 243b6cbf720SGianluca Guida mov r12, r12, lsl #16 244b6cbf720SGianluca Guida orr r12, r12, lr, lsr #16 245b6cbf720SGianluca Guida#else 246b6cbf720SGianluca Guida orr r3, r3, r4, lsl #16 247b6cbf720SGianluca Guida mov r4, r4, lsr #16 248b6cbf720SGianluca Guida orr r4, r4, r5, lsl #16 249b6cbf720SGianluca Guida mov r5, r5, lsr #16 250b6cbf720SGianluca Guida orr r5, r5, r12, lsl #16 251b6cbf720SGianluca Guida mov r12, r12, lsr #16 252b6cbf720SGianluca Guida orr r12, r12, lr, lsl #16 253b6cbf720SGianluca Guida#endif 254b6cbf720SGianluca Guida stmia r0!, {r3-r5, r12} 255b6cbf720SGianluca Guida subs r2, r2, #0x10 256b6cbf720SGianluca Guida bge .Lmemcpy_srcul2loop16 257*84d9c625SLionel Sambuc pop {r4, r5} 258b6cbf720SGianluca Guida adds r2, r2, #0x0c 259b6cbf720SGianluca Guida blt .Lmemcpy_srcul2l4 260b6cbf720SGianluca Guida 261b6cbf720SGianluca Guida.Lmemcpy_srcul2loop4: 262b6cbf720SGianluca Guida#ifdef __ARMEB__ 263b6cbf720SGianluca Guida mov r12, lr, lsl #16 264b6cbf720SGianluca Guida#else 265b6cbf720SGianluca Guida mov r12, lr, lsr #16 266b6cbf720SGianluca Guida#endif 267b6cbf720SGianluca Guida ldr lr, [r1], #4 268b6cbf720SGianluca Guida#ifdef __ARMEB__ 269b6cbf720SGianluca Guida orr r12, r12, lr, lsr #16 270b6cbf720SGianluca Guida#else 271b6cbf720SGianluca Guida orr r12, r12, lr, lsl #16 272b6cbf720SGianluca Guida#endif 273b6cbf720SGianluca Guida str r12, [r0], #4 274b6cbf720SGianluca Guida subs r2, r2, #4 275b6cbf720SGianluca Guida bge .Lmemcpy_srcul2loop4 276b6cbf720SGianluca Guida 277b6cbf720SGianluca Guida.Lmemcpy_srcul2l4: 278b6cbf720SGianluca Guida sub r1, r1, #2 279b6cbf720SGianluca Guida b .Lmemcpy_l4 280b6cbf720SGianluca Guida 281b6cbf720SGianluca Guida.Lmemcpy_srcul3: 282b6cbf720SGianluca Guida cmp r2, #0x0c 283b6cbf720SGianluca Guida blt .Lmemcpy_srcul3loop4 284b6cbf720SGianluca Guida sub r2, r2, #0x0c 285*84d9c625SLionel Sambuc push {r4, r5} 286b6cbf720SGianluca Guida 287b6cbf720SGianluca Guida.Lmemcpy_srcul3loop16: 288b6cbf720SGianluca Guida#ifdef __ARMEB__ 289b6cbf720SGianluca Guida mov r3, lr, lsl #24 290b6cbf720SGianluca Guida#else 291b6cbf720SGianluca Guida mov r3, lr, lsr #24 292b6cbf720SGianluca Guida#endif 293b6cbf720SGianluca Guida ldmia r1!, {r4, r5, r12, lr} 294b6cbf720SGianluca Guida#ifdef __ARMEB__ 295b6cbf720SGianluca Guida orr r3, r3, r4, lsr #8 296b6cbf720SGianluca Guida mov r4, r4, lsl #24 297b6cbf720SGianluca Guida orr r4, r4, r5, lsr #8 298b6cbf720SGianluca Guida mov r5, r5, lsl #24 299b6cbf720SGianluca Guida orr r5, r5, r12, lsr #8 300b6cbf720SGianluca Guida mov r12, r12, lsl #24 301b6cbf720SGianluca Guida orr r12, r12, lr, lsr #8 302b6cbf720SGianluca Guida#else 303b6cbf720SGianluca Guida orr r3, r3, r4, lsl #8 304b6cbf720SGianluca Guida mov r4, r4, lsr #24 305b6cbf720SGianluca Guida orr r4, r4, r5, lsl #8 306b6cbf720SGianluca Guida mov r5, r5, lsr #24 307b6cbf720SGianluca Guida orr r5, r5, r12, lsl #8 308b6cbf720SGianluca Guida mov r12, r12, lsr #24 309b6cbf720SGianluca Guida orr r12, r12, lr, lsl #8 310b6cbf720SGianluca Guida#endif 311b6cbf720SGianluca Guida stmia r0!, {r3-r5, r12} 312b6cbf720SGianluca Guida subs r2, r2, #0x10 313b6cbf720SGianluca Guida bge .Lmemcpy_srcul3loop16 314*84d9c625SLionel Sambuc pop {r4, r5} 315b6cbf720SGianluca Guida adds r2, r2, #0x0c 316b6cbf720SGianluca Guida blt .Lmemcpy_srcul3l4 317b6cbf720SGianluca Guida 318b6cbf720SGianluca Guida.Lmemcpy_srcul3loop4: 319b6cbf720SGianluca Guida#ifdef __ARMEB__ 320b6cbf720SGianluca Guida mov r12, lr, lsl #24 321b6cbf720SGianluca Guida#else 322b6cbf720SGianluca Guida mov r12, lr, lsr #24 323b6cbf720SGianluca Guida#endif 324b6cbf720SGianluca Guida ldr lr, [r1], #4 325b6cbf720SGianluca Guida#ifdef __ARMEB__ 326b6cbf720SGianluca Guida orr r12, r12, lr, lsr #8 327b6cbf720SGianluca Guida#else 328b6cbf720SGianluca Guida orr r12, r12, lr, lsl #8 329b6cbf720SGianluca Guida#endif 330b6cbf720SGianluca Guida str r12, [r0], #4 331b6cbf720SGianluca Guida subs r2, r2, #4 332b6cbf720SGianluca Guida bge .Lmemcpy_srcul3loop4 333b6cbf720SGianluca Guida 334b6cbf720SGianluca Guida.Lmemcpy_srcul3l4: 335b6cbf720SGianluca Guida sub r1, r1, #1 336b6cbf720SGianluca Guida b .Lmemcpy_l4 337*84d9c625SLionel SambucEND(memcpy) 338